media/libvpx/vp8/encoder/mcomp.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp8/encoder/mcomp.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2029 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS.  All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +
    1.15 +#include "onyx_int.h"
    1.16 +#include "mcomp.h"
    1.17 +#include "vpx_mem/vpx_mem.h"
    1.18 +#include "vpx_config.h"
    1.19 +#include <stdio.h>
    1.20 +#include <limits.h>
    1.21 +#include <math.h>
    1.22 +#include "vp8/common/findnearmv.h"
    1.23 +
    1.24 +#ifdef VP8_ENTROPY_STATS
    1.25 +static int mv_ref_ct [31] [4] [2];
    1.26 +static int mv_mode_cts [4] [2];
    1.27 +#endif
    1.28 +
    1.29 +int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight)
    1.30 +{
    1.31 +    /* MV costing is based on the distribution of vectors in the previous
    1.32 +     * frame and as such will tend to over state the cost of vectors. In
    1.33 +     * addition coding a new vector can have a knock on effect on the cost
    1.34 +     * of subsequent vectors and the quality of prediction from NEAR and
    1.35 +     * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
    1.36 +     * limited extent, for some account to be taken of these factors.
    1.37 +     */
    1.38 +    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7;
    1.39 +}
    1.40 +
    1.41 +static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit)
    1.42 +{
    1.43 +    /* Ignore mv costing if mvcost is NULL */
    1.44 +    if (mvcost)
    1.45 +        return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
    1.46 +                 mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1])
    1.47 +                 * error_per_bit + 128) >> 8;
    1.48 +    return 0;
    1.49 +}
    1.50 +
    1.51 +static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit)
    1.52 +{
    1.53 +    /* Calculate sad error cost on full pixel basis. */
    1.54 +    /* Ignore mv costing if mvsadcost is NULL */
    1.55 +    if (mvsadcost)
    1.56 +        return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
    1.57 +                 mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)])
    1.58 +                * error_per_bit + 128) >> 8;
    1.59 +    return 0;
    1.60 +}
    1.61 +
    1.62 +void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
    1.63 +{
    1.64 +    int Len;
    1.65 +    int search_site_count = 0;
    1.66 +
    1.67 +
    1.68 +    /* Generate offsets for 4 search sites per step. */
    1.69 +    Len = MAX_FIRST_STEP;
    1.70 +    x->ss[search_site_count].mv.col = 0;
    1.71 +    x->ss[search_site_count].mv.row = 0;
    1.72 +    x->ss[search_site_count].offset = 0;
    1.73 +    search_site_count++;
    1.74 +
    1.75 +    while (Len > 0)
    1.76 +    {
    1.77 +
    1.78 +        /* Compute offsets for search sites. */
    1.79 +        x->ss[search_site_count].mv.col = 0;
    1.80 +        x->ss[search_site_count].mv.row = -Len;
    1.81 +        x->ss[search_site_count].offset = -Len * stride;
    1.82 +        search_site_count++;
    1.83 +
    1.84 +        /* Compute offsets for search sites. */
    1.85 +        x->ss[search_site_count].mv.col = 0;
    1.86 +        x->ss[search_site_count].mv.row = Len;
    1.87 +        x->ss[search_site_count].offset = Len * stride;
    1.88 +        search_site_count++;
    1.89 +
    1.90 +        /* Compute offsets for search sites. */
    1.91 +        x->ss[search_site_count].mv.col = -Len;
    1.92 +        x->ss[search_site_count].mv.row = 0;
    1.93 +        x->ss[search_site_count].offset = -Len;
    1.94 +        search_site_count++;
    1.95 +
    1.96 +        /* Compute offsets for search sites. */
    1.97 +        x->ss[search_site_count].mv.col = Len;
    1.98 +        x->ss[search_site_count].mv.row = 0;
    1.99 +        x->ss[search_site_count].offset = Len;
   1.100 +        search_site_count++;
   1.101 +
   1.102 +        /* Contract. */
   1.103 +        Len /= 2;
   1.104 +    }
   1.105 +
   1.106 +    x->ss_count = search_site_count;
   1.107 +    x->searches_per_step = 4;
   1.108 +}
   1.109 +
   1.110 +void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
   1.111 +{
   1.112 +    int Len;
   1.113 +    int search_site_count = 0;
   1.114 +
   1.115 +    /* Generate offsets for 8 search sites per step. */
   1.116 +    Len = MAX_FIRST_STEP;
   1.117 +    x->ss[search_site_count].mv.col = 0;
   1.118 +    x->ss[search_site_count].mv.row = 0;
   1.119 +    x->ss[search_site_count].offset = 0;
   1.120 +    search_site_count++;
   1.121 +
   1.122 +    while (Len > 0)
   1.123 +    {
   1.124 +
   1.125 +        /* Compute offsets for search sites. */
   1.126 +        x->ss[search_site_count].mv.col = 0;
   1.127 +        x->ss[search_site_count].mv.row = -Len;
   1.128 +        x->ss[search_site_count].offset = -Len * stride;
   1.129 +        search_site_count++;
   1.130 +
   1.131 +        /* Compute offsets for search sites. */
   1.132 +        x->ss[search_site_count].mv.col = 0;
   1.133 +        x->ss[search_site_count].mv.row = Len;
   1.134 +        x->ss[search_site_count].offset = Len * stride;
   1.135 +        search_site_count++;
   1.136 +
   1.137 +        /* Compute offsets for search sites. */
   1.138 +        x->ss[search_site_count].mv.col = -Len;
   1.139 +        x->ss[search_site_count].mv.row = 0;
   1.140 +        x->ss[search_site_count].offset = -Len;
   1.141 +        search_site_count++;
   1.142 +
   1.143 +        /* Compute offsets for search sites. */
   1.144 +        x->ss[search_site_count].mv.col = Len;
   1.145 +        x->ss[search_site_count].mv.row = 0;
   1.146 +        x->ss[search_site_count].offset = Len;
   1.147 +        search_site_count++;
   1.148 +
   1.149 +        /* Compute offsets for search sites. */
   1.150 +        x->ss[search_site_count].mv.col = -Len;
   1.151 +        x->ss[search_site_count].mv.row = -Len;
   1.152 +        x->ss[search_site_count].offset = -Len * stride - Len;
   1.153 +        search_site_count++;
   1.154 +
   1.155 +        /* Compute offsets for search sites. */
   1.156 +        x->ss[search_site_count].mv.col = Len;
   1.157 +        x->ss[search_site_count].mv.row = -Len;
   1.158 +        x->ss[search_site_count].offset = -Len * stride + Len;
   1.159 +        search_site_count++;
   1.160 +
   1.161 +        /* Compute offsets for search sites. */
   1.162 +        x->ss[search_site_count].mv.col = -Len;
   1.163 +        x->ss[search_site_count].mv.row = Len;
   1.164 +        x->ss[search_site_count].offset = Len * stride - Len;
   1.165 +        search_site_count++;
   1.166 +
   1.167 +        /* Compute offsets for search sites. */
   1.168 +        x->ss[search_site_count].mv.col = Len;
   1.169 +        x->ss[search_site_count].mv.row = Len;
   1.170 +        x->ss[search_site_count].offset = Len * stride + Len;
   1.171 +        search_site_count++;
   1.172 +
   1.173 +
   1.174 +        /* Contract. */
   1.175 +        Len /= 2;
   1.176 +    }
   1.177 +
   1.178 +    x->ss_count = search_site_count;
   1.179 +    x->searches_per_step = 8;
   1.180 +}
   1.181 +
   1.182 +/*
   1.183 + * To avoid the penalty for crossing cache-line read, preload the reference
   1.184 + * area in a small buffer, which is aligned to make sure there won't be crossing
   1.185 + * cache-line read while reading from this buffer. This reduced the cpu
   1.186 + * cycles spent on reading ref data in sub-pixel filter functions.
   1.187 + * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
   1.188 + * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
   1.189 + * could reduce the area.
   1.190 + */
   1.191 +
   1.192 +/* estimated cost of a motion vector (r,c) */
   1.193 +#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0)
   1.194 +/* pointer to predictor base of a motionvector */
   1.195 +#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset)))
   1.196 +/* convert motion vector component to offset for svf calc */
   1.197 +#define SP(x) (((x)&3)<<1)
   1.198 +/* returns subpixel variance error function. */
   1.199 +#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse)
   1.200 +#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
   1.201 +/* returns distortion + motion vector cost */
   1.202 +#define ERR(r,c) (MVC(r,c)+DIST(r,c))
   1.203 +/* checks if (r,c) has better score than previous best */
   1.204 +#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;)
   1.205 +
   1.206 +int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1.207 +                                             int_mv *bestmv, int_mv *ref_mv,
   1.208 +                                             int error_per_bit,
   1.209 +                                             const vp8_variance_fn_ptr_t *vfp,
   1.210 +                                             int *mvcost[2], int *distortion,
   1.211 +                                             unsigned int *sse1)
   1.212 +{
   1.213 +    unsigned char *z = (*(b->base_src) + b->src);
   1.214 +
   1.215 +    int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
   1.216 +    int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
   1.217 +    int tr = br, tc = bc;
   1.218 +    unsigned int besterr;
   1.219 +    unsigned int left, right, up, down, diag;
   1.220 +    unsigned int sse;
   1.221 +    unsigned int whichdir;
   1.222 +    unsigned int halfiters = 4;
   1.223 +    unsigned int quarteriters = 4;
   1.224 +    int thismse;
   1.225 +
   1.226 +    int minc = MAX(x->mv_col_min * 4,
   1.227 +                   (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
   1.228 +    int maxc = MIN(x->mv_col_max * 4,
   1.229 +                   (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
   1.230 +    int minr = MAX(x->mv_row_min * 4,
   1.231 +                   (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
   1.232 +    int maxr = MIN(x->mv_row_max * 4,
   1.233 +                   (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
   1.234 +
   1.235 +    int y_stride;
   1.236 +    int offset;
   1.237 +    int pre_stride = x->e_mbd.pre.y_stride;
   1.238 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1.239 +
   1.240 +
   1.241 +#if ARCH_X86 || ARCH_X86_64
   1.242 +    MACROBLOCKD *xd = &x->e_mbd;
   1.243 +    unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
   1.244 +    unsigned char *y;
   1.245 +    int buf_r1, buf_r2, buf_c1;
   1.246 +
   1.247 +    /* Clamping to avoid out-of-range data access */
   1.248 +    buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3;
   1.249 +    buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3;
   1.250 +    buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3;
   1.251 +    y_stride = 32;
   1.252 +
   1.253 +    /* Copy to intermediate buffer before searching. */
   1.254 +    vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2);
   1.255 +    y = xd->y_buf + y_stride*buf_r1 +buf_c1;
   1.256 +#else
   1.257 +    unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
   1.258 +    y_stride = pre_stride;
   1.259 +#endif
   1.260 +
   1.261 +    offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
   1.262 +
   1.263 +    /* central mv */
   1.264 +    bestmv->as_mv.row *= 8;
   1.265 +    bestmv->as_mv.col *= 8;
   1.266 +
   1.267 +    /* calculate central point error */
   1.268 +    besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   1.269 +    *distortion = besterr;
   1.270 +    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
   1.271 +
   1.272 +    /* TODO: Each subsequent iteration checks at least one point in common
   1.273 +     * with the last iteration could be 2 ( if diag selected)
   1.274 +     */
   1.275 +    while (--halfiters)
   1.276 +    {
   1.277 +        /* 1/2 pel */
   1.278 +        CHECK_BETTER(left, tr, tc - 2);
   1.279 +        CHECK_BETTER(right, tr, tc + 2);
   1.280 +        CHECK_BETTER(up, tr - 2, tc);
   1.281 +        CHECK_BETTER(down, tr + 2, tc);
   1.282 +
   1.283 +        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
   1.284 +
   1.285 +        switch (whichdir)
   1.286 +        {
   1.287 +        case 0:
   1.288 +            CHECK_BETTER(diag, tr - 2, tc - 2);
   1.289 +            break;
   1.290 +        case 1:
   1.291 +            CHECK_BETTER(diag, tr - 2, tc + 2);
   1.292 +            break;
   1.293 +        case 2:
   1.294 +            CHECK_BETTER(diag, tr + 2, tc - 2);
   1.295 +            break;
   1.296 +        case 3:
   1.297 +            CHECK_BETTER(diag, tr + 2, tc + 2);
   1.298 +            break;
   1.299 +        }
   1.300 +
   1.301 +        /* no reason to check the same one again. */
   1.302 +        if (tr == br && tc == bc)
   1.303 +            break;
   1.304 +
   1.305 +        tr = br;
   1.306 +        tc = bc;
   1.307 +    }
   1.308 +
   1.309 +    /* TODO: Each subsequent iteration checks at least one point in common
   1.310 +     * with the last iteration could be 2 ( if diag selected)
   1.311 +     */
   1.312 +
   1.313 +    /* 1/4 pel */
   1.314 +    while (--quarteriters)
   1.315 +    {
   1.316 +        CHECK_BETTER(left, tr, tc - 1);
   1.317 +        CHECK_BETTER(right, tr, tc + 1);
   1.318 +        CHECK_BETTER(up, tr - 1, tc);
   1.319 +        CHECK_BETTER(down, tr + 1, tc);
   1.320 +
   1.321 +        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
   1.322 +
   1.323 +        switch (whichdir)
   1.324 +        {
   1.325 +        case 0:
   1.326 +            CHECK_BETTER(diag, tr - 1, tc - 1);
   1.327 +            break;
   1.328 +        case 1:
   1.329 +            CHECK_BETTER(diag, tr - 1, tc + 1);
   1.330 +            break;
   1.331 +        case 2:
   1.332 +            CHECK_BETTER(diag, tr + 1, tc - 1);
   1.333 +            break;
   1.334 +        case 3:
   1.335 +            CHECK_BETTER(diag, tr + 1, tc + 1);
   1.336 +            break;
   1.337 +        }
   1.338 +
   1.339 +        /* no reason to check the same one again. */
   1.340 +        if (tr == br && tc == bc)
   1.341 +            break;
   1.342 +
   1.343 +        tr = br;
   1.344 +        tc = bc;
   1.345 +    }
   1.346 +
   1.347 +    bestmv->as_mv.row = br * 2;
   1.348 +    bestmv->as_mv.col = bc * 2;
   1.349 +
   1.350 +    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
   1.351 +        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
   1.352 +        return INT_MAX;
   1.353 +
   1.354 +    return besterr;
   1.355 +}
   1.356 +#undef MVC
   1.357 +#undef PRE
   1.358 +#undef SP
   1.359 +#undef DIST
   1.360 +#undef IFMVCV
   1.361 +#undef ERR
   1.362 +#undef CHECK_BETTER
   1.363 +
   1.364 +int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1.365 +                                 int_mv *bestmv, int_mv *ref_mv,
   1.366 +                                 int error_per_bit,
   1.367 +                                 const vp8_variance_fn_ptr_t *vfp,
   1.368 +                                 int *mvcost[2], int *distortion,
   1.369 +                                 unsigned int *sse1)
   1.370 +{
   1.371 +    int bestmse = INT_MAX;
   1.372 +    int_mv startmv;
   1.373 +    int_mv this_mv;
   1.374 +    unsigned char *z = (*(b->base_src) + b->src);
   1.375 +    int left, right, up, down, diag;
   1.376 +    unsigned int sse;
   1.377 +    int whichdir ;
   1.378 +    int thismse;
   1.379 +    int y_stride;
   1.380 +    int pre_stride = x->e_mbd.pre.y_stride;
   1.381 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1.382 +
   1.383 +#if ARCH_X86 || ARCH_X86_64
   1.384 +    MACROBLOCKD *xd = &x->e_mbd;
   1.385 +    unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
   1.386 +    unsigned char *y;
   1.387 +
   1.388 +    y_stride = 32;
   1.389 +    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
   1.390 +     vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
   1.391 +     y = xd->y_buf + y_stride + 1;
   1.392 +#else
   1.393 +     unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
   1.394 +     y_stride = pre_stride;
   1.395 +#endif
   1.396 +
   1.397 +    /* central mv */
   1.398 +    bestmv->as_mv.row <<= 3;
   1.399 +    bestmv->as_mv.col <<= 3;
   1.400 +    startmv = *bestmv;
   1.401 +
   1.402 +    /* calculate central point error */
   1.403 +    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   1.404 +    *distortion = bestmse;
   1.405 +    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
   1.406 +
   1.407 +    /* go left then right and check error */
   1.408 +    this_mv.as_mv.row = startmv.as_mv.row;
   1.409 +    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
   1.410 +    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
   1.411 +    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.412 +
   1.413 +    if (left < bestmse)
   1.414 +    {
   1.415 +        *bestmv = this_mv;
   1.416 +        bestmse = left;
   1.417 +        *distortion = thismse;
   1.418 +        *sse1 = sse;
   1.419 +    }
   1.420 +
   1.421 +    this_mv.as_mv.col += 8;
   1.422 +    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
   1.423 +    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.424 +
   1.425 +    if (right < bestmse)
   1.426 +    {
   1.427 +        *bestmv = this_mv;
   1.428 +        bestmse = right;
   1.429 +        *distortion = thismse;
   1.430 +        *sse1 = sse;
   1.431 +    }
   1.432 +
   1.433 +    /* go up then down and check error */
   1.434 +    this_mv.as_mv.col = startmv.as_mv.col;
   1.435 +    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
   1.436 +    thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
   1.437 +    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.438 +
   1.439 +    if (up < bestmse)
   1.440 +    {
   1.441 +        *bestmv = this_mv;
   1.442 +        bestmse = up;
   1.443 +        *distortion = thismse;
   1.444 +        *sse1 = sse;
   1.445 +    }
   1.446 +
   1.447 +    this_mv.as_mv.row += 8;
   1.448 +    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
   1.449 +    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.450 +
   1.451 +    if (down < bestmse)
   1.452 +    {
   1.453 +        *bestmv = this_mv;
   1.454 +        bestmse = down;
   1.455 +        *distortion = thismse;
   1.456 +        *sse1 = sse;
   1.457 +    }
   1.458 +
   1.459 +
   1.460 +    /* now check 1 more diagonal */
   1.461 +    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
   1.462 +    this_mv = startmv;
   1.463 +
   1.464 +    switch (whichdir)
   1.465 +    {
   1.466 +    case 0:
   1.467 +        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
   1.468 +        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
   1.469 +        thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
   1.470 +        break;
   1.471 +    case 1:
   1.472 +        this_mv.as_mv.col += 4;
   1.473 +        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
   1.474 +        thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
   1.475 +        break;
   1.476 +    case 2:
   1.477 +        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
   1.478 +        this_mv.as_mv.row += 4;
   1.479 +        thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
   1.480 +        break;
   1.481 +    case 3:
   1.482 +    default:
   1.483 +        this_mv.as_mv.col += 4;
   1.484 +        this_mv.as_mv.row += 4;
   1.485 +        thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
   1.486 +        break;
   1.487 +    }
   1.488 +
   1.489 +    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.490 +
   1.491 +    if (diag < bestmse)
   1.492 +    {
   1.493 +        *bestmv = this_mv;
   1.494 +        bestmse = diag;
   1.495 +        *distortion = thismse;
   1.496 +        *sse1 = sse;
   1.497 +    }
   1.498 +
   1.499 +
   1.500 +    /* time to check quarter pels. */
   1.501 +    if (bestmv->as_mv.row < startmv.as_mv.row)
   1.502 +        y -= y_stride;
   1.503 +
   1.504 +    if (bestmv->as_mv.col < startmv.as_mv.col)
   1.505 +        y--;
   1.506 +
   1.507 +    startmv = *bestmv;
   1.508 +
   1.509 +
   1.510 +
   1.511 +    /* go left then right and check error */
   1.512 +    this_mv.as_mv.row = startmv.as_mv.row;
   1.513 +
   1.514 +    if (startmv.as_mv.col & 7)
   1.515 +    {
   1.516 +        this_mv.as_mv.col = startmv.as_mv.col - 2;
   1.517 +        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.518 +    }
   1.519 +    else
   1.520 +    {
   1.521 +        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
   1.522 +        thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.523 +    }
   1.524 +
   1.525 +    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.526 +
   1.527 +    if (left < bestmse)
   1.528 +    {
   1.529 +        *bestmv = this_mv;
   1.530 +        bestmse = left;
   1.531 +        *distortion = thismse;
   1.532 +        *sse1 = sse;
   1.533 +    }
   1.534 +
   1.535 +    this_mv.as_mv.col += 4;
   1.536 +    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.537 +    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.538 +
   1.539 +    if (right < bestmse)
   1.540 +    {
   1.541 +        *bestmv = this_mv;
   1.542 +        bestmse = right;
   1.543 +        *distortion = thismse;
   1.544 +        *sse1 = sse;
   1.545 +    }
   1.546 +
   1.547 +    /* go up then down and check error */
   1.548 +    this_mv.as_mv.col = startmv.as_mv.col;
   1.549 +
   1.550 +    if (startmv.as_mv.row & 7)
   1.551 +    {
   1.552 +        this_mv.as_mv.row = startmv.as_mv.row - 2;
   1.553 +        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.554 +    }
   1.555 +    else
   1.556 +    {
   1.557 +        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
   1.558 +        thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
   1.559 +    }
   1.560 +
   1.561 +    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.562 +
   1.563 +    if (up < bestmse)
   1.564 +    {
   1.565 +        *bestmv = this_mv;
   1.566 +        bestmse = up;
   1.567 +        *distortion = thismse;
   1.568 +        *sse1 = sse;
   1.569 +    }
   1.570 +
   1.571 +    this_mv.as_mv.row += 4;
   1.572 +    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.573 +    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.574 +
   1.575 +    if (down < bestmse)
   1.576 +    {
   1.577 +        *bestmv = this_mv;
   1.578 +        bestmse = down;
   1.579 +        *distortion = thismse;
   1.580 +        *sse1 = sse;
   1.581 +    }
   1.582 +
   1.583 +
   1.584 +    /* now check 1 more diagonal */
   1.585 +    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
   1.586 +
   1.587 +    this_mv = startmv;
   1.588 +
   1.589 +    switch (whichdir)
   1.590 +    {
   1.591 +    case 0:
   1.592 +
   1.593 +        if (startmv.as_mv.row & 7)
   1.594 +        {
   1.595 +            this_mv.as_mv.row -= 2;
   1.596 +
   1.597 +            if (startmv.as_mv.col & 7)
   1.598 +            {
   1.599 +                this_mv.as_mv.col -= 2;
   1.600 +                thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.601 +            }
   1.602 +            else
   1.603 +            {
   1.604 +                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
   1.605 +                thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);;
   1.606 +            }
   1.607 +        }
   1.608 +        else
   1.609 +        {
   1.610 +            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
   1.611 +
   1.612 +            if (startmv.as_mv.col & 7)
   1.613 +            {
   1.614 +                this_mv.as_mv.col -= 2;
   1.615 +                thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
   1.616 +            }
   1.617 +            else
   1.618 +            {
   1.619 +                this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
   1.620 +                thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse);
   1.621 +            }
   1.622 +        }
   1.623 +
   1.624 +        break;
   1.625 +    case 1:
   1.626 +        this_mv.as_mv.col += 2;
   1.627 +
   1.628 +        if (startmv.as_mv.row & 7)
   1.629 +        {
   1.630 +            this_mv.as_mv.row -= 2;
   1.631 +            thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.632 +        }
   1.633 +        else
   1.634 +        {
   1.635 +            this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
   1.636 +            thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse);
   1.637 +        }
   1.638 +
   1.639 +        break;
   1.640 +    case 2:
   1.641 +        this_mv.as_mv.row += 2;
   1.642 +
   1.643 +        if (startmv.as_mv.col & 7)
   1.644 +        {
   1.645 +            this_mv.as_mv.col -= 2;
   1.646 +            thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.647 +        }
   1.648 +        else
   1.649 +        {
   1.650 +            this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
   1.651 +            thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.652 +        }
   1.653 +
   1.654 +        break;
   1.655 +    case 3:
   1.656 +        this_mv.as_mv.col += 2;
   1.657 +        this_mv.as_mv.row += 2;
   1.658 +        thismse = vfp->svf(y, y_stride,  this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse);
   1.659 +        break;
   1.660 +    }
   1.661 +
   1.662 +    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.663 +
   1.664 +    if (diag < bestmse)
   1.665 +    {
   1.666 +        *bestmv = this_mv;
   1.667 +        bestmse = diag;
   1.668 +        *distortion = thismse;
   1.669 +        *sse1 = sse;
   1.670 +    }
   1.671 +
   1.672 +    return bestmse;
   1.673 +}
   1.674 +
   1.675 +int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
   1.676 +                                  int_mv *bestmv, int_mv *ref_mv,
   1.677 +                                  int error_per_bit,
   1.678 +                                  const vp8_variance_fn_ptr_t *vfp,
   1.679 +                                  int *mvcost[2], int *distortion,
   1.680 +                                  unsigned int *sse1)
   1.681 +{
   1.682 +    int bestmse = INT_MAX;
   1.683 +    int_mv startmv;
   1.684 +    int_mv this_mv;
   1.685 +    unsigned char *z = (*(b->base_src) + b->src);
   1.686 +    int left, right, up, down, diag;
   1.687 +    unsigned int sse;
   1.688 +    int whichdir ;
   1.689 +    int thismse;
   1.690 +    int y_stride;
   1.691 +    int pre_stride = x->e_mbd.pre.y_stride;
   1.692 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1.693 +
   1.694 +#if ARCH_X86 || ARCH_X86_64
   1.695 +    MACROBLOCKD *xd = &x->e_mbd;
   1.696 +    unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
   1.697 +    unsigned char *y;
   1.698 +
   1.699 +    y_stride = 32;
   1.700 +    /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
   1.701 +    vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
   1.702 +    y = xd->y_buf + y_stride + 1;
   1.703 +#else
   1.704 +    unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col;
   1.705 +    y_stride = pre_stride;
   1.706 +#endif
   1.707 +
   1.708 +    /* central mv */
   1.709 +    bestmv->as_mv.row *= 8;
   1.710 +    bestmv->as_mv.col *= 8;
   1.711 +    startmv = *bestmv;
   1.712 +
   1.713 +    /* calculate central point error */
   1.714 +    bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
   1.715 +    *distortion = bestmse;
   1.716 +    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
   1.717 +
   1.718 +    /* go left then right and check error */
   1.719 +    this_mv.as_mv.row = startmv.as_mv.row;
   1.720 +    this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
   1.721 +    thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse);
   1.722 +    left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.723 +
   1.724 +    if (left < bestmse)
   1.725 +    {
   1.726 +        *bestmv = this_mv;
   1.727 +        bestmse = left;
   1.728 +        *distortion = thismse;
   1.729 +        *sse1 = sse;
   1.730 +    }
   1.731 +
   1.732 +    this_mv.as_mv.col += 8;
   1.733 +    thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse);
   1.734 +    right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.735 +
   1.736 +    if (right < bestmse)
   1.737 +    {
   1.738 +        *bestmv = this_mv;
   1.739 +        bestmse = right;
   1.740 +        *distortion = thismse;
   1.741 +        *sse1 = sse;
   1.742 +    }
   1.743 +
   1.744 +    /* go up then down and check error */
   1.745 +    this_mv.as_mv.col = startmv.as_mv.col;
   1.746 +    this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
   1.747 +    thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse);
   1.748 +    up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.749 +
   1.750 +    if (up < bestmse)
   1.751 +    {
   1.752 +        *bestmv = this_mv;
   1.753 +        bestmse = up;
   1.754 +        *distortion = thismse;
   1.755 +        *sse1 = sse;
   1.756 +    }
   1.757 +
   1.758 +    this_mv.as_mv.row += 8;
   1.759 +    thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse);
   1.760 +    down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.761 +
   1.762 +    if (down < bestmse)
   1.763 +    {
   1.764 +        *bestmv = this_mv;
   1.765 +        bestmse = down;
   1.766 +        *distortion = thismse;
   1.767 +        *sse1 = sse;
   1.768 +    }
   1.769 +
   1.770 +    /* now check 1 more diagonal - */
   1.771 +    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
   1.772 +    this_mv = startmv;
   1.773 +
   1.774 +    switch (whichdir)
   1.775 +    {
   1.776 +    case 0:
   1.777 +        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
   1.778 +        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
   1.779 +        thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse);
   1.780 +        break;
   1.781 +    case 1:
   1.782 +        this_mv.as_mv.col += 4;
   1.783 +        this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
   1.784 +        thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse);
   1.785 +        break;
   1.786 +    case 2:
   1.787 +        this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
   1.788 +        this_mv.as_mv.row += 4;
   1.789 +        thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse);
   1.790 +        break;
   1.791 +    case 3:
   1.792 +    default:
   1.793 +        this_mv.as_mv.col += 4;
   1.794 +        this_mv.as_mv.row += 4;
   1.795 +        thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse);
   1.796 +        break;
   1.797 +    }
   1.798 +
   1.799 +    diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
   1.800 +
   1.801 +    if (diag < bestmse)
   1.802 +    {
   1.803 +        *bestmv = this_mv;
   1.804 +        bestmse = diag;
   1.805 +        *distortion = thismse;
   1.806 +        *sse1 = sse;
   1.807 +    }
   1.808 +
   1.809 +    return bestmse;
   1.810 +}
   1.811 +
   1.812 +#define CHECK_BOUNDS(range) \
   1.813 +{\
   1.814 +    all_in = 1;\
   1.815 +    all_in &= ((br-range) >= x->mv_row_min);\
   1.816 +    all_in &= ((br+range) <= x->mv_row_max);\
   1.817 +    all_in &= ((bc-range) >= x->mv_col_min);\
   1.818 +    all_in &= ((bc+range) <= x->mv_col_max);\
   1.819 +}
   1.820 +
   1.821 +#define CHECK_POINT \
   1.822 +{\
   1.823 +    if (this_mv.as_mv.col < x->mv_col_min) continue;\
   1.824 +    if (this_mv.as_mv.col > x->mv_col_max) continue;\
   1.825 +    if (this_mv.as_mv.row < x->mv_row_min) continue;\
   1.826 +    if (this_mv.as_mv.row > x->mv_row_max) continue;\
   1.827 +}
   1.828 +
   1.829 +#define CHECK_BETTER \
   1.830 +{\
   1.831 +    if (thissad < bestsad)\
   1.832 +    {\
   1.833 +        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\
   1.834 +        if (thissad < bestsad)\
   1.835 +        {\
   1.836 +            bestsad = thissad;\
   1.837 +            best_site = i;\
   1.838 +        }\
   1.839 +    }\
   1.840 +}
   1.841 +
   1.842 +static const MV next_chkpts[6][3] =
   1.843 +{
   1.844 +    {{ -2, 0}, { -1, -2}, {1, -2}},
   1.845 +    {{ -1, -2}, {1, -2}, {2, 0}},
   1.846 +    {{1, -2}, {2, 0}, {1, 2}},
   1.847 +    {{2, 0}, {1, 2}, { -1, 2}},
   1.848 +    {{1, 2}, { -1, 2}, { -2, 0}},
   1.849 +    {{ -1, 2}, { -2, 0}, { -1, -2}}
   1.850 +};
   1.851 +
   1.852 +int vp8_hex_search
   1.853 +(
   1.854 +    MACROBLOCK *x,
   1.855 +    BLOCK *b,
   1.856 +    BLOCKD *d,
   1.857 +    int_mv *ref_mv,
   1.858 +    int_mv *best_mv,
   1.859 +    int search_param,
   1.860 +    int sad_per_bit,
   1.861 +    const vp8_variance_fn_ptr_t *vfp,
   1.862 +    int *mvsadcost[2],
   1.863 +    int *mvcost[2],
   1.864 +    int_mv *center_mv
   1.865 +)
   1.866 +{
   1.867 +    MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
   1.868 +    MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ;
   1.869 +    int i, j;
   1.870 +
   1.871 +    unsigned char *what = (*(b->base_src) + b->src);
   1.872 +    int what_stride = b->src_stride;
   1.873 +    int pre_stride = x->e_mbd.pre.y_stride;
   1.874 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
   1.875 +
   1.876 +    int in_what_stride = pre_stride;
   1.877 +    int br, bc;
   1.878 +    int_mv this_mv;
   1.879 +    unsigned int bestsad;
   1.880 +    unsigned int thissad;
   1.881 +    unsigned char *base_offset;
   1.882 +    unsigned char *this_offset;
   1.883 +    int k = -1;
   1.884 +    int all_in;
   1.885 +    int best_site = -1;
   1.886 +    int hex_range = 127;
   1.887 +    int dia_range = 8;
   1.888 +
   1.889 +    int_mv fcenter_mv;
   1.890 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
   1.891 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
   1.892 +
   1.893 +    /* adjust ref_mv to make sure it is within MV range */
   1.894 +    vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   1.895 +    br = ref_mv->as_mv.row;
   1.896 +    bc = ref_mv->as_mv.col;
   1.897 +
   1.898 +    /* Work out the start point for the search */
   1.899 +    base_offset = (unsigned char *)(base_pre + d->offset);
   1.900 +    this_offset = base_offset + (br * (pre_stride)) + bc;
   1.901 +    this_mv.as_mv.row = br;
   1.902 +    this_mv.as_mv.col = bc;
   1.903 +    bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX)
   1.904 +            + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
   1.905 +
   1.906 +#if CONFIG_MULTI_RES_ENCODING
   1.907 +    /* Lower search range based on prediction info */
   1.908 +    if (search_param >= 6) goto cal_neighbors;
   1.909 +    else if (search_param >= 5) hex_range = 4;
   1.910 +    else if (search_param >= 4) hex_range = 6;
   1.911 +    else if (search_param >= 3) hex_range = 15;
   1.912 +    else if (search_param >= 2) hex_range = 31;
   1.913 +    else if (search_param >= 1) hex_range = 63;
   1.914 +
   1.915 +    dia_range = 8;
   1.916 +#endif
   1.917 +
   1.918 +    /* hex search */
   1.919 +    CHECK_BOUNDS(2)
   1.920 +
   1.921 +    if(all_in)
   1.922 +    {
   1.923 +        for (i = 0; i < 6; i++)
   1.924 +        {
   1.925 +            this_mv.as_mv.row = br + hex[i].row;
   1.926 +            this_mv.as_mv.col = bc + hex[i].col;
   1.927 +            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
   1.928 +            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1.929 +            CHECK_BETTER
   1.930 +        }
   1.931 +    }else
   1.932 +    {
   1.933 +        for (i = 0; i < 6; i++)
   1.934 +        {
   1.935 +            this_mv.as_mv.row = br + hex[i].row;
   1.936 +            this_mv.as_mv.col = bc + hex[i].col;
   1.937 +            CHECK_POINT
   1.938 +            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
   1.939 +            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1.940 +            CHECK_BETTER
   1.941 +        }
   1.942 +    }
   1.943 +
   1.944 +    if (best_site == -1)
   1.945 +        goto cal_neighbors;
   1.946 +    else
   1.947 +    {
   1.948 +        br += hex[best_site].row;
   1.949 +        bc += hex[best_site].col;
   1.950 +        k = best_site;
   1.951 +    }
   1.952 +
   1.953 +    for (j = 1; j < hex_range; j++)
   1.954 +    {
   1.955 +        best_site = -1;
   1.956 +        CHECK_BOUNDS(2)
   1.957 +
   1.958 +        if(all_in)
   1.959 +        {
   1.960 +            for (i = 0; i < 3; i++)
   1.961 +            {
   1.962 +                this_mv.as_mv.row = br + next_chkpts[k][i].row;
   1.963 +                this_mv.as_mv.col = bc + next_chkpts[k][i].col;
   1.964 +                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1.965 +                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1.966 +                CHECK_BETTER
   1.967 +            }
   1.968 +        }else
   1.969 +        {
   1.970 +            for (i = 0; i < 3; i++)
   1.971 +            {
   1.972 +                this_mv.as_mv.row = br + next_chkpts[k][i].row;
   1.973 +                this_mv.as_mv.col = bc + next_chkpts[k][i].col;
   1.974 +                CHECK_POINT
   1.975 +                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
   1.976 +                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
   1.977 +                CHECK_BETTER
   1.978 +            }
   1.979 +        }
   1.980 +
   1.981 +        if (best_site == -1)
   1.982 +            break;
   1.983 +        else
   1.984 +        {
   1.985 +            br += next_chkpts[k][best_site].row;
   1.986 +            bc += next_chkpts[k][best_site].col;
   1.987 +            k += 5 + best_site;
   1.988 +            if (k >= 12) k -= 12;
   1.989 +            else if (k >= 6) k -= 6;
   1.990 +        }
   1.991 +    }
   1.992 +
   1.993 +    /* check 4 1-away neighbors */
   1.994 +cal_neighbors:
   1.995 +    for (j = 0; j < dia_range; j++)
   1.996 +    {
   1.997 +        best_site = -1;
   1.998 +        CHECK_BOUNDS(1)
   1.999 +
  1.1000 +        if(all_in)
  1.1001 +        {
  1.1002 +            for (i = 0; i < 4; i++)
  1.1003 +            {
  1.1004 +                this_mv.as_mv.row = br + neighbors[i].row;
  1.1005 +                this_mv.as_mv.col = bc + neighbors[i].col;
  1.1006 +                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
  1.1007 +                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
  1.1008 +                CHECK_BETTER
  1.1009 +            }
  1.1010 +        }else
  1.1011 +        {
  1.1012 +            for (i = 0; i < 4; i++)
  1.1013 +            {
  1.1014 +                this_mv.as_mv.row = br + neighbors[i].row;
  1.1015 +                this_mv.as_mv.col = bc + neighbors[i].col;
  1.1016 +                CHECK_POINT
  1.1017 +                this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
  1.1018 +                thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
  1.1019 +                CHECK_BETTER
  1.1020 +            }
  1.1021 +        }
  1.1022 +
  1.1023 +        if (best_site == -1)
  1.1024 +            break;
  1.1025 +        else
  1.1026 +        {
  1.1027 +            br += neighbors[best_site].row;
  1.1028 +            bc += neighbors[best_site].col;
  1.1029 +        }
  1.1030 +    }
  1.1031 +
  1.1032 +    best_mv->as_mv.row = br;
  1.1033 +    best_mv->as_mv.col = bc;
  1.1034 +
  1.1035 +    return bestsad;
  1.1036 +}
  1.1037 +#undef CHECK_BOUNDS
  1.1038 +#undef CHECK_POINT
  1.1039 +#undef CHECK_BETTER
  1.1040 +
  1.1041 +int vp8_diamond_search_sad_c
  1.1042 +(
  1.1043 +    MACROBLOCK *x,
  1.1044 +    BLOCK *b,
  1.1045 +    BLOCKD *d,
  1.1046 +    int_mv *ref_mv,
  1.1047 +    int_mv *best_mv,
  1.1048 +    int search_param,
  1.1049 +    int sad_per_bit,
  1.1050 +    int *num00,
  1.1051 +    vp8_variance_fn_ptr_t *fn_ptr,
  1.1052 +    int *mvcost[2],
  1.1053 +    int_mv *center_mv
  1.1054 +)
  1.1055 +{
  1.1056 +    int i, j, step;
  1.1057 +
  1.1058 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1059 +    int what_stride = b->src_stride;
  1.1060 +    unsigned char *in_what;
  1.1061 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1062 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1063 +    int in_what_stride = pre_stride;
  1.1064 +    unsigned char *best_address;
  1.1065 +
  1.1066 +    int tot_steps;
  1.1067 +    int_mv this_mv;
  1.1068 +
  1.1069 +    unsigned int bestsad;
  1.1070 +    unsigned int thissad;
  1.1071 +    int best_site = 0;
  1.1072 +    int last_site = 0;
  1.1073 +
  1.1074 +    int ref_row;
  1.1075 +    int ref_col;
  1.1076 +    int this_row_offset;
  1.1077 +    int this_col_offset;
  1.1078 +    search_site *ss;
  1.1079 +
  1.1080 +    unsigned char *check_here;
  1.1081 +
  1.1082 +    int *mvsadcost[2];
  1.1083 +    int_mv fcenter_mv;
  1.1084 +
  1.1085 +    mvsadcost[0] = x->mvsadcost[0];
  1.1086 +    mvsadcost[1] = x->mvsadcost[1];
  1.1087 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1088 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1089 +
  1.1090 +    vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1.1091 +    ref_row = ref_mv->as_mv.row;
  1.1092 +    ref_col = ref_mv->as_mv.col;
  1.1093 +    *num00 = 0;
  1.1094 +    best_mv->as_mv.row = ref_row;
  1.1095 +    best_mv->as_mv.col = ref_col;
  1.1096 +
  1.1097 +    /* Work out the start point for the search */
  1.1098 +    in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
  1.1099 +    best_address = in_what;
  1.1100 +
  1.1101 +    /* Check the starting position */
  1.1102 +    bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
  1.1103 +            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1.1104 +
  1.1105 +    /* search_param determines the length of the initial step and hence
  1.1106 +     * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
  1.1107 +     * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
  1.1108 +     */
  1.1109 +    ss = &x->ss[search_param * x->searches_per_step];
  1.1110 +    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
  1.1111 +
  1.1112 +    i = 1;
  1.1113 +
  1.1114 +    for (step = 0; step < tot_steps ; step++)
  1.1115 +    {
  1.1116 +        for (j = 0 ; j < x->searches_per_step ; j++)
  1.1117 +        {
  1.1118 +            /* Trap illegal vectors */
  1.1119 +            this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
  1.1120 +            this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
  1.1121 +
  1.1122 +            if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
  1.1123 +            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
  1.1124 +
  1.1125 +            {
  1.1126 +                check_here = ss[i].offset + best_address;
  1.1127 +                thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
  1.1128 +
  1.1129 +                if (thissad < bestsad)
  1.1130 +                {
  1.1131 +                    this_mv.as_mv.row = this_row_offset;
  1.1132 +                    this_mv.as_mv.col = this_col_offset;
  1.1133 +                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1134 +                                              mvsadcost, sad_per_bit);
  1.1135 +
  1.1136 +                    if (thissad < bestsad)
  1.1137 +                    {
  1.1138 +                        bestsad = thissad;
  1.1139 +                        best_site = i;
  1.1140 +                    }
  1.1141 +                }
  1.1142 +            }
  1.1143 +
  1.1144 +            i++;
  1.1145 +        }
  1.1146 +
  1.1147 +        if (best_site != last_site)
  1.1148 +        {
  1.1149 +            best_mv->as_mv.row += ss[best_site].mv.row;
  1.1150 +            best_mv->as_mv.col += ss[best_site].mv.col;
  1.1151 +            best_address += ss[best_site].offset;
  1.1152 +            last_site = best_site;
  1.1153 +        }
  1.1154 +        else if (best_address == in_what)
  1.1155 +            (*num00)++;
  1.1156 +    }
  1.1157 +
  1.1158 +    this_mv.as_mv.row = best_mv->as_mv.row << 3;
  1.1159 +    this_mv.as_mv.col = best_mv->as_mv.col << 3;
  1.1160 +
  1.1161 +    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
  1.1162 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1163 +}
  1.1164 +
  1.1165 +int vp8_diamond_search_sadx4
  1.1166 +(
  1.1167 +    MACROBLOCK *x,
  1.1168 +    BLOCK *b,
  1.1169 +    BLOCKD *d,
  1.1170 +    int_mv *ref_mv,
  1.1171 +    int_mv *best_mv,
  1.1172 +    int search_param,
  1.1173 +    int sad_per_bit,
  1.1174 +    int *num00,
  1.1175 +    vp8_variance_fn_ptr_t *fn_ptr,
  1.1176 +    int *mvcost[2],
  1.1177 +    int_mv *center_mv
  1.1178 +)
  1.1179 +{
  1.1180 +    int i, j, step;
  1.1181 +
  1.1182 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1183 +    int what_stride = b->src_stride;
  1.1184 +    unsigned char *in_what;
  1.1185 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1186 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1187 +    int in_what_stride = pre_stride;
  1.1188 +    unsigned char *best_address;
  1.1189 +
  1.1190 +    int tot_steps;
  1.1191 +    int_mv this_mv;
  1.1192 +
  1.1193 +    unsigned int bestsad;
  1.1194 +    unsigned int thissad;
  1.1195 +    int best_site = 0;
  1.1196 +    int last_site = 0;
  1.1197 +
  1.1198 +    int ref_row;
  1.1199 +    int ref_col;
  1.1200 +    int this_row_offset;
  1.1201 +    int this_col_offset;
  1.1202 +    search_site *ss;
  1.1203 +
  1.1204 +    unsigned char *check_here;
  1.1205 +
  1.1206 +    int *mvsadcost[2];
  1.1207 +    int_mv fcenter_mv;
  1.1208 +
  1.1209 +    mvsadcost[0] = x->mvsadcost[0];
  1.1210 +    mvsadcost[1] = x->mvsadcost[1];
  1.1211 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1212 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1213 +
  1.1214 +    vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1.1215 +    ref_row = ref_mv->as_mv.row;
  1.1216 +    ref_col = ref_mv->as_mv.col;
  1.1217 +    *num00 = 0;
  1.1218 +    best_mv->as_mv.row = ref_row;
  1.1219 +    best_mv->as_mv.col = ref_col;
  1.1220 +
  1.1221 +    /* Work out the start point for the search */
  1.1222 +    in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col);
  1.1223 +    best_address = in_what;
  1.1224 +
  1.1225 +    /* Check the starting position */
  1.1226 +    bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX)
  1.1227 +            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1.1228 +
  1.1229 +    /* search_param determines the length of the initial step and hence the
  1.1230 +     * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
  1.1231 +     * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
  1.1232 +     */
  1.1233 +    ss = &x->ss[search_param * x->searches_per_step];
  1.1234 +    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
  1.1235 +
  1.1236 +    i = 1;
  1.1237 +
  1.1238 +    for (step = 0; step < tot_steps ; step++)
  1.1239 +    {
  1.1240 +        int all_in = 1, t;
  1.1241 +
  1.1242 +        /* To know if all neighbor points are within the bounds, 4 bounds
  1.1243 +         * checking are enough instead of checking 4 bounds for each
  1.1244 +         * points.
  1.1245 +         */
  1.1246 +        all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min);
  1.1247 +        all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max);
  1.1248 +        all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min);
  1.1249 +        all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max);
  1.1250 +
  1.1251 +        if (all_in)
  1.1252 +        {
  1.1253 +            unsigned int sad_array[4];
  1.1254 +
  1.1255 +            for (j = 0 ; j < x->searches_per_step ; j += 4)
  1.1256 +            {
  1.1257 +                const unsigned char *block_offset[4];
  1.1258 +
  1.1259 +                for (t = 0; t < 4; t++)
  1.1260 +                    block_offset[t] = ss[i+t].offset + best_address;
  1.1261 +
  1.1262 +                fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
  1.1263 +
  1.1264 +                for (t = 0; t < 4; t++, i++)
  1.1265 +                {
  1.1266 +                    if (sad_array[t] < bestsad)
  1.1267 +                    {
  1.1268 +                        this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
  1.1269 +                        this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
  1.1270 +                        sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1271 +                                                       mvsadcost, sad_per_bit);
  1.1272 +
  1.1273 +                        if (sad_array[t] < bestsad)
  1.1274 +                        {
  1.1275 +                            bestsad = sad_array[t];
  1.1276 +                            best_site = i;
  1.1277 +                        }
  1.1278 +                    }
  1.1279 +                }
  1.1280 +            }
  1.1281 +        }
  1.1282 +        else
  1.1283 +        {
  1.1284 +            for (j = 0 ; j < x->searches_per_step ; j++)
  1.1285 +            {
  1.1286 +                /* Trap illegal vectors */
  1.1287 +                this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
  1.1288 +                this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
  1.1289 +
  1.1290 +                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
  1.1291 +                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
  1.1292 +                {
  1.1293 +                    check_here = ss[i].offset + best_address;
  1.1294 +                    thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
  1.1295 +
  1.1296 +                    if (thissad < bestsad)
  1.1297 +                    {
  1.1298 +                        this_mv.as_mv.row = this_row_offset;
  1.1299 +                        this_mv.as_mv.col = this_col_offset;
  1.1300 +                        thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1301 +                                                  mvsadcost, sad_per_bit);
  1.1302 +
  1.1303 +                        if (thissad < bestsad)
  1.1304 +                        {
  1.1305 +                            bestsad = thissad;
  1.1306 +                            best_site = i;
  1.1307 +                        }
  1.1308 +                    }
  1.1309 +                }
  1.1310 +                i++;
  1.1311 +            }
  1.1312 +        }
  1.1313 +
  1.1314 +        if (best_site != last_site)
  1.1315 +        {
  1.1316 +            best_mv->as_mv.row += ss[best_site].mv.row;
  1.1317 +            best_mv->as_mv.col += ss[best_site].mv.col;
  1.1318 +            best_address += ss[best_site].offset;
  1.1319 +            last_site = best_site;
  1.1320 +        }
  1.1321 +        else if (best_address == in_what)
  1.1322 +            (*num00)++;
  1.1323 +    }
  1.1324 +
  1.1325 +    this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1326 +    this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1327 +
  1.1328 +    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
  1.1329 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1330 +}
  1.1331 +
  1.1332 +int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1.1333 +                        int sad_per_bit, int distance,
  1.1334 +                        vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1.1335 +                        int_mv *center_mv)
  1.1336 +{
  1.1337 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1338 +    int what_stride = b->src_stride;
  1.1339 +    unsigned char *in_what;
  1.1340 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1341 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1342 +    int in_what_stride = pre_stride;
  1.1343 +    int mv_stride = pre_stride;
  1.1344 +    unsigned char *bestaddress;
  1.1345 +    int_mv *best_mv = &d->bmi.mv;
  1.1346 +    int_mv this_mv;
  1.1347 +    unsigned int bestsad;
  1.1348 +    unsigned int thissad;
  1.1349 +    int r, c;
  1.1350 +
  1.1351 +    unsigned char *check_here;
  1.1352 +
  1.1353 +    int ref_row = ref_mv->as_mv.row;
  1.1354 +    int ref_col = ref_mv->as_mv.col;
  1.1355 +
  1.1356 +    int row_min = ref_row - distance;
  1.1357 +    int row_max = ref_row + distance;
  1.1358 +    int col_min = ref_col - distance;
  1.1359 +    int col_max = ref_col + distance;
  1.1360 +
  1.1361 +    int *mvsadcost[2];
  1.1362 +    int_mv fcenter_mv;
  1.1363 +
  1.1364 +    mvsadcost[0] = x->mvsadcost[0];
  1.1365 +    mvsadcost[1] = x->mvsadcost[1];
  1.1366 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1367 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1368 +
  1.1369 +    /* Work out the mid point for the search */
  1.1370 +    in_what = base_pre + d->offset;
  1.1371 +    bestaddress = in_what + (ref_row * pre_stride) + ref_col;
  1.1372 +
  1.1373 +    best_mv->as_mv.row = ref_row;
  1.1374 +    best_mv->as_mv.col = ref_col;
  1.1375 +
  1.1376 +    /* Baseline value at the centre */
  1.1377 +    bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
  1.1378 +                          in_what_stride, UINT_MAX)
  1.1379 +            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1.1380 +
  1.1381 +    /* Apply further limits to prevent us looking using vectors that
  1.1382 +     * stretch beyiond the UMV border
  1.1383 +     */
  1.1384 +    if (col_min < x->mv_col_min)
  1.1385 +        col_min = x->mv_col_min;
  1.1386 +
  1.1387 +    if (col_max > x->mv_col_max)
  1.1388 +        col_max = x->mv_col_max;
  1.1389 +
  1.1390 +    if (row_min < x->mv_row_min)
  1.1391 +        row_min = x->mv_row_min;
  1.1392 +
  1.1393 +    if (row_max > x->mv_row_max)
  1.1394 +        row_max = x->mv_row_max;
  1.1395 +
  1.1396 +    for (r = row_min; r < row_max ; r++)
  1.1397 +    {
  1.1398 +        this_mv.as_mv.row = r;
  1.1399 +        check_here = r * mv_stride + in_what + col_min;
  1.1400 +
  1.1401 +        for (c = col_min; c < col_max; c++)
  1.1402 +        {
  1.1403 +            thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
  1.1404 +
  1.1405 +            this_mv.as_mv.col = c;
  1.1406 +            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1407 +                                      mvsadcost, sad_per_bit);
  1.1408 +
  1.1409 +            if (thissad < bestsad)
  1.1410 +            {
  1.1411 +                bestsad = thissad;
  1.1412 +                best_mv->as_mv.row = r;
  1.1413 +                best_mv->as_mv.col = c;
  1.1414 +                bestaddress = check_here;
  1.1415 +            }
  1.1416 +
  1.1417 +            check_here++;
  1.1418 +        }
  1.1419 +    }
  1.1420 +
  1.1421 +    this_mv.as_mv.row = best_mv->as_mv.row << 3;
  1.1422 +    this_mv.as_mv.col = best_mv->as_mv.col << 3;
  1.1423 +
  1.1424 +    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
  1.1425 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1426 +}
  1.1427 +
  1.1428 +int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1.1429 +                          int sad_per_bit, int distance,
  1.1430 +                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1.1431 +                          int_mv *center_mv)
  1.1432 +{
  1.1433 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1434 +    int what_stride = b->src_stride;
  1.1435 +    unsigned char *in_what;
  1.1436 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1437 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1438 +    int in_what_stride = pre_stride;
  1.1439 +    int mv_stride = pre_stride;
  1.1440 +    unsigned char *bestaddress;
  1.1441 +    int_mv *best_mv = &d->bmi.mv;
  1.1442 +    int_mv this_mv;
  1.1443 +    unsigned int bestsad;
  1.1444 +    unsigned int thissad;
  1.1445 +    int r, c;
  1.1446 +
  1.1447 +    unsigned char *check_here;
  1.1448 +
  1.1449 +    int ref_row = ref_mv->as_mv.row;
  1.1450 +    int ref_col = ref_mv->as_mv.col;
  1.1451 +
  1.1452 +    int row_min = ref_row - distance;
  1.1453 +    int row_max = ref_row + distance;
  1.1454 +    int col_min = ref_col - distance;
  1.1455 +    int col_max = ref_col + distance;
  1.1456 +
  1.1457 +    unsigned int sad_array[3];
  1.1458 +
  1.1459 +    int *mvsadcost[2];
  1.1460 +    int_mv fcenter_mv;
  1.1461 +
  1.1462 +    mvsadcost[0] = x->mvsadcost[0];
  1.1463 +    mvsadcost[1] = x->mvsadcost[1];
  1.1464 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1465 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1466 +
  1.1467 +    /* Work out the mid point for the search */
  1.1468 +    in_what = base_pre + d->offset;
  1.1469 +    bestaddress = in_what + (ref_row * pre_stride) + ref_col;
  1.1470 +
  1.1471 +    best_mv->as_mv.row = ref_row;
  1.1472 +    best_mv->as_mv.col = ref_col;
  1.1473 +
  1.1474 +    /* Baseline value at the centre */
  1.1475 +    bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
  1.1476 +                          in_what_stride, UINT_MAX)
  1.1477 +            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1.1478 +
  1.1479 +    /* Apply further limits to prevent us looking using vectors that stretch
  1.1480 +     * beyond the UMV border
  1.1481 +     */
  1.1482 +    if (col_min < x->mv_col_min)
  1.1483 +        col_min = x->mv_col_min;
  1.1484 +
  1.1485 +    if (col_max > x->mv_col_max)
  1.1486 +        col_max = x->mv_col_max;
  1.1487 +
  1.1488 +    if (row_min < x->mv_row_min)
  1.1489 +        row_min = x->mv_row_min;
  1.1490 +
  1.1491 +    if (row_max > x->mv_row_max)
  1.1492 +        row_max = x->mv_row_max;
  1.1493 +
  1.1494 +    for (r = row_min; r < row_max ; r++)
  1.1495 +    {
  1.1496 +        this_mv.as_mv.row = r;
  1.1497 +        check_here = r * mv_stride + in_what + col_min;
  1.1498 +        c = col_min;
  1.1499 +
  1.1500 +        while ((c + 2) < col_max)
  1.1501 +        {
  1.1502 +            int i;
  1.1503 +
  1.1504 +            fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
  1.1505 +
  1.1506 +            for (i = 0; i < 3; i++)
  1.1507 +            {
  1.1508 +                thissad = sad_array[i];
  1.1509 +
  1.1510 +                if (thissad < bestsad)
  1.1511 +                {
  1.1512 +                    this_mv.as_mv.col = c;
  1.1513 +                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1514 +                                              mvsadcost, sad_per_bit);
  1.1515 +
  1.1516 +                    if (thissad < bestsad)
  1.1517 +                    {
  1.1518 +                        bestsad = thissad;
  1.1519 +                        best_mv->as_mv.row = r;
  1.1520 +                        best_mv->as_mv.col = c;
  1.1521 +                        bestaddress = check_here;
  1.1522 +                    }
  1.1523 +                }
  1.1524 +
  1.1525 +                check_here++;
  1.1526 +                c++;
  1.1527 +            }
  1.1528 +        }
  1.1529 +
  1.1530 +        while (c < col_max)
  1.1531 +        {
  1.1532 +            thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
  1.1533 +
  1.1534 +            if (thissad < bestsad)
  1.1535 +            {
  1.1536 +                this_mv.as_mv.col = c;
  1.1537 +                thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1538 +                                          mvsadcost, sad_per_bit);
  1.1539 +
  1.1540 +                if (thissad < bestsad)
  1.1541 +                {
  1.1542 +                    bestsad = thissad;
  1.1543 +                    best_mv->as_mv.row = r;
  1.1544 +                    best_mv->as_mv.col = c;
  1.1545 +                    bestaddress = check_here;
  1.1546 +                }
  1.1547 +            }
  1.1548 +
  1.1549 +            check_here ++;
  1.1550 +            c ++;
  1.1551 +        }
  1.1552 +
  1.1553 +    }
  1.1554 +
  1.1555 +    this_mv.as_mv.row = best_mv->as_mv.row << 3;
  1.1556 +    this_mv.as_mv.col = best_mv->as_mv.col << 3;
  1.1557 +
  1.1558 +    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
  1.1559 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1560 +}
  1.1561 +
  1.1562 +int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1.1563 +                          int sad_per_bit, int distance,
  1.1564 +                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1.1565 +                          int_mv *center_mv)
  1.1566 +{
  1.1567 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1568 +    int what_stride = b->src_stride;
  1.1569 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1570 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1571 +    unsigned char *in_what;
  1.1572 +    int in_what_stride = pre_stride;
  1.1573 +    int mv_stride = pre_stride;
  1.1574 +    unsigned char *bestaddress;
  1.1575 +    int_mv *best_mv = &d->bmi.mv;
  1.1576 +    int_mv this_mv;
  1.1577 +    unsigned int bestsad;
  1.1578 +    unsigned int thissad;
  1.1579 +    int r, c;
  1.1580 +
  1.1581 +    unsigned char *check_here;
  1.1582 +
  1.1583 +    int ref_row = ref_mv->as_mv.row;
  1.1584 +    int ref_col = ref_mv->as_mv.col;
  1.1585 +
  1.1586 +    int row_min = ref_row - distance;
  1.1587 +    int row_max = ref_row + distance;
  1.1588 +    int col_min = ref_col - distance;
  1.1589 +    int col_max = ref_col + distance;
  1.1590 +
  1.1591 +    DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
  1.1592 +    unsigned int sad_array[3];
  1.1593 +
  1.1594 +    int *mvsadcost[2];
  1.1595 +    int_mv fcenter_mv;
  1.1596 +
  1.1597 +    mvsadcost[0] = x->mvsadcost[0];
  1.1598 +    mvsadcost[1] = x->mvsadcost[1];
  1.1599 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1600 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1601 +
  1.1602 +    /* Work out the mid point for the search */
  1.1603 +    in_what = base_pre + d->offset;
  1.1604 +    bestaddress = in_what + (ref_row * pre_stride) + ref_col;
  1.1605 +
  1.1606 +    best_mv->as_mv.row = ref_row;
  1.1607 +    best_mv->as_mv.col = ref_col;
  1.1608 +
  1.1609 +    /* Baseline value at the centre */
  1.1610 +    bestsad = fn_ptr->sdf(what, what_stride,
  1.1611 +                          bestaddress, in_what_stride, UINT_MAX)
  1.1612 +            + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1.1613 +
  1.1614 +    /* Apply further limits to prevent us looking using vectors that stretch
  1.1615 +     * beyond the UMV border
  1.1616 +     */
  1.1617 +    if (col_min < x->mv_col_min)
  1.1618 +        col_min = x->mv_col_min;
  1.1619 +
  1.1620 +    if (col_max > x->mv_col_max)
  1.1621 +        col_max = x->mv_col_max;
  1.1622 +
  1.1623 +    if (row_min < x->mv_row_min)
  1.1624 +        row_min = x->mv_row_min;
  1.1625 +
  1.1626 +    if (row_max > x->mv_row_max)
  1.1627 +        row_max = x->mv_row_max;
  1.1628 +
  1.1629 +    for (r = row_min; r < row_max ; r++)
  1.1630 +    {
  1.1631 +        this_mv.as_mv.row = r;
  1.1632 +        check_here = r * mv_stride + in_what + col_min;
  1.1633 +        c = col_min;
  1.1634 +
  1.1635 +        while ((c + 7) < col_max)
  1.1636 +        {
  1.1637 +            int i;
  1.1638 +
  1.1639 +            fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
  1.1640 +
  1.1641 +            for (i = 0; i < 8; i++)
  1.1642 +            {
  1.1643 +                thissad = sad_array8[i];
  1.1644 +
  1.1645 +                if (thissad < bestsad)
  1.1646 +                {
  1.1647 +                    this_mv.as_mv.col = c;
  1.1648 +                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1649 +                                              mvsadcost, sad_per_bit);
  1.1650 +
  1.1651 +                    if (thissad < bestsad)
  1.1652 +                    {
  1.1653 +                        bestsad = thissad;
  1.1654 +                        best_mv->as_mv.row = r;
  1.1655 +                        best_mv->as_mv.col = c;
  1.1656 +                        bestaddress = check_here;
  1.1657 +                    }
  1.1658 +                }
  1.1659 +
  1.1660 +                check_here++;
  1.1661 +                c++;
  1.1662 +            }
  1.1663 +        }
  1.1664 +
  1.1665 +        while ((c + 2) < col_max)
  1.1666 +        {
  1.1667 +            int i;
  1.1668 +
  1.1669 +            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
  1.1670 +
  1.1671 +            for (i = 0; i < 3; i++)
  1.1672 +            {
  1.1673 +                thissad = sad_array[i];
  1.1674 +
  1.1675 +                if (thissad < bestsad)
  1.1676 +                {
  1.1677 +                    this_mv.as_mv.col = c;
  1.1678 +                    thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1679 +                        mvsadcost, sad_per_bit);
  1.1680 +
  1.1681 +                    if (thissad < bestsad)
  1.1682 +                    {
  1.1683 +                        bestsad = thissad;
  1.1684 +                        best_mv->as_mv.row = r;
  1.1685 +                        best_mv->as_mv.col = c;
  1.1686 +                        bestaddress = check_here;
  1.1687 +                    }
  1.1688 +                }
  1.1689 +
  1.1690 +                check_here++;
  1.1691 +                c++;
  1.1692 +            }
  1.1693 +        }
  1.1694 +
  1.1695 +        while (c < col_max)
  1.1696 +        {
  1.1697 +            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
  1.1698 +
  1.1699 +            if (thissad < bestsad)
  1.1700 +            {
  1.1701 +                this_mv.as_mv.col = c;
  1.1702 +                thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
  1.1703 +                    mvsadcost, sad_per_bit);
  1.1704 +
  1.1705 +                if (thissad < bestsad)
  1.1706 +                {
  1.1707 +                    bestsad = thissad;
  1.1708 +                    best_mv->as_mv.row = r;
  1.1709 +                    best_mv->as_mv.col = c;
  1.1710 +                    bestaddress = check_here;
  1.1711 +                }
  1.1712 +            }
  1.1713 +
  1.1714 +            check_here ++;
  1.1715 +            c ++;
  1.1716 +        }
  1.1717 +    }
  1.1718 +
  1.1719 +    this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1.1720 +    this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1.1721 +
  1.1722 +    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
  1.1723 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1724 +}
  1.1725 +
  1.1726 +int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1.1727 +                            int error_per_bit, int search_range,
  1.1728 +                            vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1.1729 +                            int_mv *center_mv)
  1.1730 +{
  1.1731 +    MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
  1.1732 +    int i, j;
  1.1733 +    short this_row_offset, this_col_offset;
  1.1734 +
  1.1735 +    int what_stride = b->src_stride;
  1.1736 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1737 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1738 +    int in_what_stride = pre_stride;
  1.1739 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1740 +    unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
  1.1741 +        (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
  1.1742 +    unsigned char *check_here;
  1.1743 +    int_mv this_mv;
  1.1744 +    unsigned int bestsad;
  1.1745 +    unsigned int thissad;
  1.1746 +
  1.1747 +    int *mvsadcost[2];
  1.1748 +    int_mv fcenter_mv;
  1.1749 +
  1.1750 +    mvsadcost[0] = x->mvsadcost[0];
  1.1751 +    mvsadcost[1] = x->mvsadcost[1];
  1.1752 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1753 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1754 +
  1.1755 +    bestsad = fn_ptr->sdf(what, what_stride, best_address,
  1.1756 +                          in_what_stride, UINT_MAX)
  1.1757 +            + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1.1758 +
  1.1759 +    for (i=0; i<search_range; i++)
  1.1760 +    {
  1.1761 +        int best_site = -1;
  1.1762 +
  1.1763 +        for (j = 0 ; j < 4 ; j++)
  1.1764 +        {
  1.1765 +            this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1.1766 +            this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1.1767 +
  1.1768 +            if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
  1.1769 +            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
  1.1770 +            {
  1.1771 +                check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
  1.1772 +                thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
  1.1773 +
  1.1774 +                if (thissad < bestsad)
  1.1775 +                {
  1.1776 +                    this_mv.as_mv.row = this_row_offset;
  1.1777 +                    this_mv.as_mv.col = this_col_offset;
  1.1778 +                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1.1779 +
  1.1780 +                    if (thissad < bestsad)
  1.1781 +                    {
  1.1782 +                        bestsad = thissad;
  1.1783 +                        best_site = j;
  1.1784 +                    }
  1.1785 +                }
  1.1786 +            }
  1.1787 +        }
  1.1788 +
  1.1789 +        if (best_site == -1)
  1.1790 +            break;
  1.1791 +        else
  1.1792 +        {
  1.1793 +            ref_mv->as_mv.row += neighbors[best_site].row;
  1.1794 +            ref_mv->as_mv.col += neighbors[best_site].col;
  1.1795 +            best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
  1.1796 +        }
  1.1797 +    }
  1.1798 +
  1.1799 +    this_mv.as_mv.row = ref_mv->as_mv.row << 3;
  1.1800 +    this_mv.as_mv.col = ref_mv->as_mv.col << 3;
  1.1801 +
  1.1802 +    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
  1.1803 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1804 +}
  1.1805 +
  1.1806 +int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  1.1807 +                              int_mv *ref_mv, int error_per_bit,
  1.1808 +                              int search_range, vp8_variance_fn_ptr_t *fn_ptr,
  1.1809 +                              int *mvcost[2], int_mv *center_mv)
  1.1810 +{
  1.1811 +    MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
  1.1812 +    int i, j;
  1.1813 +    short this_row_offset, this_col_offset;
  1.1814 +
  1.1815 +    int what_stride = b->src_stride;
  1.1816 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1817 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1818 +    int in_what_stride = pre_stride;
  1.1819 +    unsigned char *what = (*(b->base_src) + b->src);
  1.1820 +    unsigned char *best_address = (unsigned char *)(base_pre + d->offset +
  1.1821 +        (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
  1.1822 +    unsigned char *check_here;
  1.1823 +    int_mv this_mv;
  1.1824 +    unsigned int bestsad;
  1.1825 +    unsigned int thissad;
  1.1826 +
  1.1827 +    int *mvsadcost[2];
  1.1828 +    int_mv fcenter_mv;
  1.1829 +
  1.1830 +    mvsadcost[0] = x->mvsadcost[0];
  1.1831 +    mvsadcost[1] = x->mvsadcost[1];
  1.1832 +    fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1.1833 +    fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1.1834 +
  1.1835 +    bestsad = fn_ptr->sdf(what, what_stride, best_address,
  1.1836 +                          in_what_stride, UINT_MAX)
  1.1837 +            + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1.1838 +
  1.1839 +    for (i=0; i<search_range; i++)
  1.1840 +    {
  1.1841 +        int best_site = -1;
  1.1842 +        int all_in = 1;
  1.1843 +
  1.1844 +        all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
  1.1845 +        all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
  1.1846 +        all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
  1.1847 +        all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
  1.1848 +
  1.1849 +        if(all_in)
  1.1850 +        {
  1.1851 +            unsigned int sad_array[4];
  1.1852 +            const unsigned char *block_offset[4];
  1.1853 +            block_offset[0] = best_address - in_what_stride;
  1.1854 +            block_offset[1] = best_address - 1;
  1.1855 +            block_offset[2] = best_address + 1;
  1.1856 +            block_offset[3] = best_address + in_what_stride;
  1.1857 +
  1.1858 +            fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
  1.1859 +
  1.1860 +            for (j = 0; j < 4; j++)
  1.1861 +            {
  1.1862 +                if (sad_array[j] < bestsad)
  1.1863 +                {
  1.1864 +                    this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
  1.1865 +                    this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
  1.1866 +                    sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1.1867 +
  1.1868 +                    if (sad_array[j] < bestsad)
  1.1869 +                    {
  1.1870 +                        bestsad = sad_array[j];
  1.1871 +                        best_site = j;
  1.1872 +                    }
  1.1873 +                }
  1.1874 +            }
  1.1875 +        }
  1.1876 +        else
  1.1877 +        {
  1.1878 +            for (j = 0 ; j < 4 ; j++)
  1.1879 +            {
  1.1880 +                this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1.1881 +                this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1.1882 +
  1.1883 +                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
  1.1884 +                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
  1.1885 +                {
  1.1886 +                    check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
  1.1887 +                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
  1.1888 +
  1.1889 +                    if (thissad < bestsad)
  1.1890 +                    {
  1.1891 +                        this_mv.as_mv.row = this_row_offset;
  1.1892 +                        this_mv.as_mv.col = this_col_offset;
  1.1893 +                        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1.1894 +
  1.1895 +                        if (thissad < bestsad)
  1.1896 +                        {
  1.1897 +                            bestsad = thissad;
  1.1898 +                            best_site = j;
  1.1899 +                        }
  1.1900 +                    }
  1.1901 +                }
  1.1902 +            }
  1.1903 +        }
  1.1904 +
  1.1905 +        if (best_site == -1)
  1.1906 +            break;
  1.1907 +        else
  1.1908 +        {
  1.1909 +            ref_mv->as_mv.row += neighbors[best_site].row;
  1.1910 +            ref_mv->as_mv.col += neighbors[best_site].col;
  1.1911 +            best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col;
  1.1912 +        }
  1.1913 +    }
  1.1914 +
  1.1915 +    this_mv.as_mv.row = ref_mv->as_mv.row * 8;
  1.1916 +    this_mv.as_mv.col = ref_mv->as_mv.col * 8;
  1.1917 +
  1.1918 +    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
  1.1919 +           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1.1920 +}
  1.1921 +
  1.1922 +#ifdef VP8_ENTROPY_STATS
  1.1923 +void print_mode_context(void)
  1.1924 +{
  1.1925 +    FILE *f = fopen("modecont.c", "w");
  1.1926 +    int i, j;
  1.1927 +
  1.1928 +    fprintf(f, "#include \"entropy.h\"\n");
  1.1929 +    fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
  1.1930 +    fprintf(f, "{\n");
  1.1931 +
  1.1932 +    for (j = 0; j < 6; j++)
  1.1933 +    {
  1.1934 +        fprintf(f, "  { /* %d */\n", j);
  1.1935 +        fprintf(f, "    ");
  1.1936 +
  1.1937 +        for (i = 0; i < 4; i++)
  1.1938 +        {
  1.1939 +            int overal_prob;
  1.1940 +            int this_prob;
  1.1941 +            int count;
  1.1942 +
  1.1943 +            /* Overall probs */
  1.1944 +            count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
  1.1945 +
  1.1946 +            if (count)
  1.1947 +                overal_prob = 256 * mv_mode_cts[i][0] / count;
  1.1948 +            else
  1.1949 +                overal_prob = 128;
  1.1950 +
  1.1951 +            if (overal_prob == 0)
  1.1952 +                overal_prob = 1;
  1.1953 +
  1.1954 +            /* context probs */
  1.1955 +            count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
  1.1956 +
  1.1957 +            if (count)
  1.1958 +                this_prob = 256 * mv_ref_ct[j][i][0] / count;
  1.1959 +            else
  1.1960 +                this_prob = 128;
  1.1961 +
  1.1962 +            if (this_prob == 0)
  1.1963 +                this_prob = 1;
  1.1964 +
  1.1965 +            fprintf(f, "%5d, ", this_prob);
  1.1966 +        }
  1.1967 +
  1.1968 +        fprintf(f, "  },\n");
  1.1969 +    }
  1.1970 +
  1.1971 +    fprintf(f, "};\n");
  1.1972 +    fclose(f);
  1.1973 +}
  1.1974 +
  1.1975 +/* MV ref count VP8_ENTROPY_STATS stats code */
  1.1976 +#ifdef VP8_ENTROPY_STATS
  1.1977 +void init_mv_ref_counts()
  1.1978 +{
  1.1979 +    vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
  1.1980 +    vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
  1.1981 +}
  1.1982 +
  1.1983 +void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
  1.1984 +{
  1.1985 +    if (m == ZEROMV)
  1.1986 +    {
  1.1987 +        ++mv_ref_ct [ct[0]] [0] [0];
  1.1988 +        ++mv_mode_cts[0][0];
  1.1989 +    }
  1.1990 +    else
  1.1991 +    {
  1.1992 +        ++mv_ref_ct [ct[0]] [0] [1];
  1.1993 +        ++mv_mode_cts[0][1];
  1.1994 +
  1.1995 +        if (m == NEARESTMV)
  1.1996 +        {
  1.1997 +            ++mv_ref_ct [ct[1]] [1] [0];
  1.1998 +            ++mv_mode_cts[1][0];
  1.1999 +        }
  1.2000 +        else
  1.2001 +        {
  1.2002 +            ++mv_ref_ct [ct[1]] [1] [1];
  1.2003 +            ++mv_mode_cts[1][1];
  1.2004 +
  1.2005 +            if (m == NEARMV)
  1.2006 +            {
  1.2007 +                ++mv_ref_ct [ct[2]] [2] [0];
  1.2008 +                ++mv_mode_cts[2][0];
  1.2009 +            }
  1.2010 +            else
  1.2011 +            {
  1.2012 +                ++mv_ref_ct [ct[2]] [2] [1];
  1.2013 +                ++mv_mode_cts[2][1];
  1.2014 +
  1.2015 +                if (m == NEWMV)
  1.2016 +                {
  1.2017 +                    ++mv_ref_ct [ct[3]] [3] [0];
  1.2018 +                    ++mv_mode_cts[3][0];
  1.2019 +                }
  1.2020 +                else
  1.2021 +                {
  1.2022 +                    ++mv_ref_ct [ct[3]] [3] [1];
  1.2023 +                    ++mv_mode_cts[3][1];
  1.2024 +                }
  1.2025 +            }
  1.2026 +        }
  1.2027 +    }
  1.2028 +}
  1.2029 +
  1.2030 +#endif/* END MV ref count VP8_ENTROPY_STATS stats code */
  1.2031 +
  1.2032 +#endif

mercurial