media/libvpx/vp8/encoder/rdopt.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libvpx/vp8/encoder/rdopt.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2642 @@
     1.4 +/*
     1.5 + *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS.  All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +
    1.15 +#include <stdio.h>
    1.16 +#include <math.h>
    1.17 +#include <limits.h>
    1.18 +#include <assert.h>
    1.19 +#include "vpx_config.h"
    1.20 +#include "vp8_rtcd.h"
    1.21 +#include "vp8/common/pragmas.h"
    1.22 +#include "tokenize.h"
    1.23 +#include "treewriter.h"
    1.24 +#include "onyx_int.h"
    1.25 +#include "modecosts.h"
    1.26 +#include "encodeintra.h"
    1.27 +#include "pickinter.h"
    1.28 +#include "vp8/common/entropymode.h"
    1.29 +#include "vp8/common/reconinter.h"
    1.30 +#include "vp8/common/reconintra4x4.h"
    1.31 +#include "vp8/common/findnearmv.h"
    1.32 +#include "vp8/common/quant_common.h"
    1.33 +#include "encodemb.h"
    1.34 +#include "quantize.h"
    1.35 +#include "vp8/common/variance.h"
    1.36 +#include "mcomp.h"
    1.37 +#include "rdopt.h"
    1.38 +#include "vpx_mem/vpx_mem.h"
    1.39 +#include "vp8/common/systemdependent.h"
    1.40 +#if CONFIG_TEMPORAL_DENOISING
    1.41 +#include "denoising.h"
    1.42 +#endif
    1.43 +extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x);
    1.44 +
    1.45 +#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
    1.46 +
    1.47 +typedef struct rate_distortion_struct
    1.48 +{
    1.49 +    int rate2;
    1.50 +    int rate_y;
    1.51 +    int rate_uv;
    1.52 +    int distortion2;
    1.53 +    int distortion_uv;
    1.54 +} RATE_DISTORTION;
    1.55 +
    1.56 +typedef struct best_mode_struct
    1.57 +{
    1.58 +  int yrd;
    1.59 +  int rd;
    1.60 +  int intra_rd;
    1.61 +  MB_MODE_INFO mbmode;
    1.62 +  union b_mode_info bmodes[16];
    1.63 +  PARTITION_INFO partition;
    1.64 +} BEST_MODE;
    1.65 +
    1.66 +static const int auto_speed_thresh[17] =
    1.67 +{
    1.68 +    1000,
    1.69 +    200,
    1.70 +    150,
    1.71 +    130,
    1.72 +    150,
    1.73 +    125,
    1.74 +    120,
    1.75 +    115,
    1.76 +    115,
    1.77 +    115,
    1.78 +    115,
    1.79 +    115,
    1.80 +    115,
    1.81 +    115,
    1.82 +    115,
    1.83 +    115,
    1.84 +    105
    1.85 +};
    1.86 +
    1.87 +const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] =
    1.88 +{
    1.89 +    ZEROMV,
    1.90 +    DC_PRED,
    1.91 +
    1.92 +    NEARESTMV,
    1.93 +    NEARMV,
    1.94 +
    1.95 +    ZEROMV,
    1.96 +    NEARESTMV,
    1.97 +
    1.98 +    ZEROMV,
    1.99 +    NEARESTMV,
   1.100 +
   1.101 +    NEARMV,
   1.102 +    NEARMV,
   1.103 +
   1.104 +    V_PRED,
   1.105 +    H_PRED,
   1.106 +    TM_PRED,
   1.107 +
   1.108 +    NEWMV,
   1.109 +    NEWMV,
   1.110 +    NEWMV,
   1.111 +
   1.112 +    SPLITMV,
   1.113 +    SPLITMV,
   1.114 +    SPLITMV,
   1.115 +
   1.116 +    B_PRED,
   1.117 +};
   1.118 +
   1.119 +/* This table determines the search order in reference frame priority order,
   1.120 + * which may not necessarily match INTRA,LAST,GOLDEN,ARF
   1.121 + */
   1.122 +const int vp8_ref_frame_order[MAX_MODES] =
   1.123 +{
   1.124 +    1,
   1.125 +    0,
   1.126 +
   1.127 +    1,
   1.128 +    1,
   1.129 +
   1.130 +    2,
   1.131 +    2,
   1.132 +
   1.133 +    3,
   1.134 +    3,
   1.135 +
   1.136 +    2,
   1.137 +    3,
   1.138 +
   1.139 +    0,
   1.140 +    0,
   1.141 +    0,
   1.142 +
   1.143 +    1,
   1.144 +    2,
   1.145 +    3,
   1.146 +
   1.147 +    1,
   1.148 +    2,
   1.149 +    3,
   1.150 +
   1.151 +    0,
   1.152 +};
   1.153 +
   1.154 +static void fill_token_costs(
   1.155 +    int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS],
   1.156 +    const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]
   1.157 +)
   1.158 +{
   1.159 +    int i, j, k;
   1.160 +
   1.161 +
   1.162 +    for (i = 0; i < BLOCK_TYPES; i++)
   1.163 +        for (j = 0; j < COEF_BANDS; j++)
   1.164 +            for (k = 0; k < PREV_COEF_CONTEXTS; k++)
   1.165 +
   1.166 +                /* check for pt=0 and band > 1 if block type 0
   1.167 +                 * and 0 if blocktype 1
   1.168 +                 */
   1.169 +                if (k == 0 && j > (i == 0))
   1.170 +                    vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2);
   1.171 +                else
   1.172 +                    vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree);
   1.173 +}
   1.174 +
   1.175 +static const int rd_iifactor[32] =
   1.176 +{
   1.177 +    4, 4, 3, 2, 1, 0, 0, 0,
   1.178 +    0, 0, 0, 0, 0, 0, 0, 0,
   1.179 +    0, 0, 0, 0, 0, 0, 0, 0,
   1.180 +    0, 0, 0, 0, 0, 0, 0, 0
   1.181 +};
   1.182 +
   1.183 +/* values are now correlated to quantizer */
   1.184 +static const int sad_per_bit16lut[QINDEX_RANGE] =
   1.185 +{
   1.186 +    2,  2,  2,  2,  2,  2,  2,  2,
   1.187 +    2,  2,  2,  2,  2,  2,  2,  2,
   1.188 +    3,  3,  3,  3,  3,  3,  3,  3,
   1.189 +    3,  3,  3,  3,  3,  3,  4,  4,
   1.190 +    4,  4,  4,  4,  4,  4,  4,  4,
   1.191 +    4,  4,  5,  5,  5,  5,  5,  5,
   1.192 +    5,  5,  5,  5,  5,  5,  6,  6,
   1.193 +    6,  6,  6,  6,  6,  6,  6,  6,
   1.194 +    6,  6,  7,  7,  7,  7,  7,  7,
   1.195 +    7,  7,  7,  7,  7,  7,  8,  8,
   1.196 +    8,  8,  8,  8,  8,  8,  8,  8,
   1.197 +    8,  8,  9,  9,  9,  9,  9,  9,
   1.198 +    9,  9,  9,  9,  9,  9,  10, 10,
   1.199 +    10, 10, 10, 10, 10, 10, 11, 11,
   1.200 +    11, 11, 11, 11, 12, 12, 12, 12,
   1.201 +    12, 12, 13, 13, 13, 13, 14, 14
   1.202 +};
   1.203 +static const int sad_per_bit4lut[QINDEX_RANGE] =
   1.204 +{
   1.205 +    2,  2,  2,  2,  2,  2,  3,  3,
   1.206 +    3,  3,  3,  3,  3,  3,  3,  3,
   1.207 +    3,  3,  3,  3,  4,  4,  4,  4,
   1.208 +    4,  4,  4,  4,  4,  4,  5,  5,
   1.209 +    5,  5,  5,  5,  6,  6,  6,  6,
   1.210 +    6,  6,  6,  6,  6,  6,  6,  6,
   1.211 +    7,  7,  7,  7,  7,  7,  7,  7,
   1.212 +    7,  7,  7,  7,  7,  8,  8,  8,
   1.213 +    8,  8,  9,  9,  9,  9,  9,  9,
   1.214 +    10, 10, 10, 10, 10, 10, 10, 10,
   1.215 +    11, 11, 11, 11, 11, 11, 11, 11,
   1.216 +    12, 12, 12, 12, 12, 12, 12, 12,
   1.217 +    13, 13, 13, 13, 13, 13, 13, 14,
   1.218 +    14, 14, 14, 14, 15, 15, 15, 15,
   1.219 +    16, 16, 16, 16, 17, 17, 17, 18,
   1.220 +    18, 18, 19, 19, 19, 20, 20, 20,
   1.221 +};
   1.222 +
   1.223 +void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex)
   1.224 +{
   1.225 +    cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
   1.226 +    cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
   1.227 +}
   1.228 +
   1.229 +void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)
   1.230 +{
   1.231 +    int q;
   1.232 +    int i;
   1.233 +    double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
   1.234 +    double rdconst = 2.80;
   1.235 +
   1.236 +    vp8_clear_system_state();
   1.237 +
   1.238 +    /* Further tests required to see if optimum is different
   1.239 +     * for key frames, golden frames and arf frames.
   1.240 +     */
   1.241 +    cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
   1.242 +
   1.243 +    /* Extend rate multiplier along side quantizer zbin increases */
   1.244 +    if (cpi->mb.zbin_over_quant  > 0)
   1.245 +    {
   1.246 +        double oq_factor;
   1.247 +        double modq;
   1.248 +
   1.249 +        /* Experimental code using the same basic equation as used for Q above
   1.250 +         * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size
   1.251 +         */
   1.252 +        oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant);
   1.253 +        modq = (int)((double)capped_q * oq_factor);
   1.254 +        cpi->RDMULT = (int)(rdconst * (modq * modq));
   1.255 +    }
   1.256 +
   1.257 +    if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
   1.258 +    {
   1.259 +        if (cpi->twopass.next_iiratio > 31)
   1.260 +            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
   1.261 +        else
   1.262 +            cpi->RDMULT +=
   1.263 +                (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
   1.264 +    }
   1.265 +
   1.266 +    cpi->mb.errorperbit = (cpi->RDMULT / 110);
   1.267 +    cpi->mb.errorperbit += (cpi->mb.errorperbit==0);
   1.268 +
   1.269 +    vp8_set_speed_features(cpi);
   1.270 +
   1.271 +    for (i = 0; i < MAX_MODES; i++)
   1.272 +    {
   1.273 +        x->mode_test_hit_counts[i] = 0;
   1.274 +    }
   1.275 +
   1.276 +    q = (int)pow(Qvalue, 1.25);
   1.277 +
   1.278 +    if (q < 8)
   1.279 +        q = 8;
   1.280 +
   1.281 +    if (cpi->RDMULT > 1000)
   1.282 +    {
   1.283 +        cpi->RDDIV = 1;
   1.284 +        cpi->RDMULT /= 100;
   1.285 +
   1.286 +        for (i = 0; i < MAX_MODES; i++)
   1.287 +        {
   1.288 +            if (cpi->sf.thresh_mult[i] < INT_MAX)
   1.289 +            {
   1.290 +                x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
   1.291 +            }
   1.292 +            else
   1.293 +            {
   1.294 +                x->rd_threshes[i] = INT_MAX;
   1.295 +            }
   1.296 +
   1.297 +            cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
   1.298 +        }
   1.299 +    }
   1.300 +    else
   1.301 +    {
   1.302 +        cpi->RDDIV = 100;
   1.303 +
   1.304 +        for (i = 0; i < MAX_MODES; i++)
   1.305 +        {
   1.306 +            if (cpi->sf.thresh_mult[i] < (INT_MAX / q))
   1.307 +            {
   1.308 +                x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
   1.309 +            }
   1.310 +            else
   1.311 +            {
   1.312 +                x->rd_threshes[i] = INT_MAX;
   1.313 +            }
   1.314 +
   1.315 +            cpi->rd_baseline_thresh[i] = x->rd_threshes[i];
   1.316 +        }
   1.317 +    }
   1.318 +
   1.319 +    {
   1.320 +      /* build token cost array for the type of frame we have now */
   1.321 +      FRAME_CONTEXT *l = &cpi->lfc_n;
   1.322 +
   1.323 +      if(cpi->common.refresh_alt_ref_frame)
   1.324 +          l = &cpi->lfc_a;
   1.325 +      else if(cpi->common.refresh_golden_frame)
   1.326 +          l = &cpi->lfc_g;
   1.327 +
   1.328 +      fill_token_costs(
   1.329 +          cpi->mb.token_costs,
   1.330 +          (const vp8_prob( *)[8][3][11]) l->coef_probs
   1.331 +      );
   1.332 +      /*
   1.333 +      fill_token_costs(
   1.334 +          cpi->mb.token_costs,
   1.335 +          (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs);
   1.336 +      */
   1.337 +
   1.338 +
   1.339 +      /* TODO make these mode costs depend on last,alt or gold too.  (jbb) */
   1.340 +      vp8_init_mode_costs(cpi);
   1.341 +    }
   1.342 +
   1.343 +}
   1.344 +
   1.345 +void vp8_auto_select_speed(VP8_COMP *cpi)
   1.346 +{
   1.347 +    int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
   1.348 +
   1.349 +    milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;
   1.350 +
   1.351 +#if 0
   1.352 +
   1.353 +    if (0)
   1.354 +    {
   1.355 +        FILE *f;
   1.356 +
   1.357 +        f = fopen("speed.stt", "a");
   1.358 +        fprintf(f, " %8ld %10ld %10ld %10ld\n",
   1.359 +                cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time);
   1.360 +        fclose(f);
   1.361 +    }
   1.362 +
   1.363 +#endif
   1.364 +
   1.365 +    if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress)
   1.366 +    {
   1.367 +        if (cpi->avg_pick_mode_time == 0)
   1.368 +        {
   1.369 +            cpi->Speed = 4;
   1.370 +        }
   1.371 +        else
   1.372 +        {
   1.373 +            if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95)
   1.374 +            {
   1.375 +                cpi->Speed          += 2;
   1.376 +                cpi->avg_pick_mode_time = 0;
   1.377 +                cpi->avg_encode_time = 0;
   1.378 +
   1.379 +                if (cpi->Speed > 16)
   1.380 +                {
   1.381 +                    cpi->Speed = 16;
   1.382 +                }
   1.383 +            }
   1.384 +
   1.385 +            if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed])
   1.386 +            {
   1.387 +                cpi->Speed          -= 1;
   1.388 +                cpi->avg_pick_mode_time = 0;
   1.389 +                cpi->avg_encode_time = 0;
   1.390 +
   1.391 +                /* In real-time mode, cpi->speed is in [4, 16]. */
   1.392 +                if (cpi->Speed < 4)
   1.393 +                {
   1.394 +                    cpi->Speed = 4;
   1.395 +                }
   1.396 +            }
   1.397 +        }
   1.398 +    }
   1.399 +    else
   1.400 +    {
   1.401 +        cpi->Speed += 4;
   1.402 +
   1.403 +        if (cpi->Speed > 16)
   1.404 +            cpi->Speed = 16;
   1.405 +
   1.406 +
   1.407 +        cpi->avg_pick_mode_time = 0;
   1.408 +        cpi->avg_encode_time = 0;
   1.409 +    }
   1.410 +}
   1.411 +
   1.412 +int vp8_block_error_c(short *coeff, short *dqcoeff)
   1.413 +{
   1.414 +    int i;
   1.415 +    int error = 0;
   1.416 +
   1.417 +    for (i = 0; i < 16; i++)
   1.418 +    {
   1.419 +        int this_diff = coeff[i] - dqcoeff[i];
   1.420 +        error += this_diff * this_diff;
   1.421 +    }
   1.422 +
   1.423 +    return error;
   1.424 +}
   1.425 +
   1.426 +int vp8_mbblock_error_c(MACROBLOCK *mb, int dc)
   1.427 +{
   1.428 +    BLOCK  *be;
   1.429 +    BLOCKD *bd;
   1.430 +    int i, j;
   1.431 +    int berror, error = 0;
   1.432 +
   1.433 +    for (i = 0; i < 16; i++)
   1.434 +    {
   1.435 +        be = &mb->block[i];
   1.436 +        bd = &mb->e_mbd.block[i];
   1.437 +
   1.438 +        berror = 0;
   1.439 +
   1.440 +        for (j = dc; j < 16; j++)
   1.441 +        {
   1.442 +            int this_diff = be->coeff[j] - bd->dqcoeff[j];
   1.443 +            berror += this_diff * this_diff;
   1.444 +        }
   1.445 +
   1.446 +        error += berror;
   1.447 +    }
   1.448 +
   1.449 +    return error;
   1.450 +}
   1.451 +
   1.452 +int vp8_mbuverror_c(MACROBLOCK *mb)
   1.453 +{
   1.454 +
   1.455 +    BLOCK  *be;
   1.456 +    BLOCKD *bd;
   1.457 +
   1.458 +
   1.459 +    int i;
   1.460 +    int error = 0;
   1.461 +
   1.462 +    for (i = 16; i < 24; i++)
   1.463 +    {
   1.464 +        be = &mb->block[i];
   1.465 +        bd = &mb->e_mbd.block[i];
   1.466 +
   1.467 +        error += vp8_block_error_c(be->coeff, bd->dqcoeff);
   1.468 +    }
   1.469 +
   1.470 +    return error;
   1.471 +}
   1.472 +
   1.473 +int VP8_UVSSE(MACROBLOCK *x)
   1.474 +{
   1.475 +    unsigned char *uptr, *vptr;
   1.476 +    unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
   1.477 +    unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
   1.478 +    int uv_stride = x->block[16].src_stride;
   1.479 +
   1.480 +    unsigned int sse1 = 0;
   1.481 +    unsigned int sse2 = 0;
   1.482 +    int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row;
   1.483 +    int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col;
   1.484 +    int offset;
   1.485 +    int pre_stride = x->e_mbd.pre.uv_stride;
   1.486 +
   1.487 +    if (mv_row < 0)
   1.488 +        mv_row -= 1;
   1.489 +    else
   1.490 +        mv_row += 1;
   1.491 +
   1.492 +    if (mv_col < 0)
   1.493 +        mv_col -= 1;
   1.494 +    else
   1.495 +        mv_col += 1;
   1.496 +
   1.497 +    mv_row /= 2;
   1.498 +    mv_col /= 2;
   1.499 +
   1.500 +    offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
   1.501 +    uptr = x->e_mbd.pre.u_buffer + offset;
   1.502 +    vptr = x->e_mbd.pre.v_buffer + offset;
   1.503 +
   1.504 +    if ((mv_row | mv_col) & 7)
   1.505 +    {
   1.506 +        vp8_sub_pixel_variance8x8(uptr, pre_stride,
   1.507 +            mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2);
   1.508 +        vp8_sub_pixel_variance8x8(vptr, pre_stride,
   1.509 +            mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1);
   1.510 +        sse2 += sse1;
   1.511 +    }
   1.512 +    else
   1.513 +    {
   1.514 +        vp8_variance8x8(uptr, pre_stride,
   1.515 +            upred_ptr, uv_stride, &sse2);
   1.516 +        vp8_variance8x8(vptr, pre_stride,
   1.517 +            vpred_ptr, uv_stride, &sse1);
   1.518 +        sse2 += sse1;
   1.519 +    }
   1.520 +    return sse2;
   1.521 +
   1.522 +}
   1.523 +
   1.524 +static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
   1.525 +{
   1.526 +    int c = !type;              /* start at coef 0, unless Y with Y2 */
   1.527 +    int eob = (int)(*b->eob);
   1.528 +    int pt ;    /* surrounding block/prev coef predictor */
   1.529 +    int cost = 0;
   1.530 +    short *qcoeff_ptr = b->qcoeff;
   1.531 +
   1.532 +    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
   1.533 +
   1.534 +# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
   1.535 +
   1.536 +    for (; c < eob; c++)
   1.537 +    {
   1.538 +        int v = QC(c);
   1.539 +        int t = vp8_dct_value_tokens_ptr[v].Token;
   1.540 +        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
   1.541 +        cost += vp8_dct_value_cost_ptr[v];
   1.542 +        pt = vp8_prev_token_class[t];
   1.543 +    }
   1.544 +
   1.545 +# undef QC
   1.546 +
   1.547 +    if (c < 16)
   1.548 +        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
   1.549 +
   1.550 +    pt = (c != !type); /* is eob first coefficient; */
   1.551 +    *a = *l = pt;
   1.552 +
   1.553 +    return cost;
   1.554 +}
   1.555 +
   1.556 +static int vp8_rdcost_mby(MACROBLOCK *mb)
   1.557 +{
   1.558 +    int cost = 0;
   1.559 +    int b;
   1.560 +    MACROBLOCKD *x = &mb->e_mbd;
   1.561 +    ENTROPY_CONTEXT_PLANES t_above, t_left;
   1.562 +    ENTROPY_CONTEXT *ta;
   1.563 +    ENTROPY_CONTEXT *tl;
   1.564 +
   1.565 +    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1.566 +    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1.567 +
   1.568 +    ta = (ENTROPY_CONTEXT *)&t_above;
   1.569 +    tl = (ENTROPY_CONTEXT *)&t_left;
   1.570 +
   1.571 +    for (b = 0; b < 16; b++)
   1.572 +        cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC,
   1.573 +                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
   1.574 +
   1.575 +    cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2,
   1.576 +                ta + vp8_block2above[24], tl + vp8_block2left[24]);
   1.577 +
   1.578 +    return cost;
   1.579 +}
   1.580 +
   1.581 +static void macro_block_yrd( MACROBLOCK *mb,
   1.582 +                             int *Rate,
   1.583 +                             int *Distortion)
   1.584 +{
   1.585 +    int b;
   1.586 +    MACROBLOCKD *const x = &mb->e_mbd;
   1.587 +    BLOCK   *const mb_y2 = mb->block + 24;
   1.588 +    BLOCKD *const x_y2  = x->block + 24;
   1.589 +    short *Y2DCPtr = mb_y2->src_diff;
   1.590 +    BLOCK *beptr;
   1.591 +    int d;
   1.592 +
   1.593 +    vp8_subtract_mby( mb->src_diff, *(mb->block[0].base_src),
   1.594 +        mb->block[0].src_stride,  mb->e_mbd.predictor, 16);
   1.595 +
   1.596 +    /* Fdct and building the 2nd order block */
   1.597 +    for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
   1.598 +    {
   1.599 +        mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
   1.600 +        *Y2DCPtr++ = beptr->coeff[0];
   1.601 +        *Y2DCPtr++ = beptr->coeff[16];
   1.602 +    }
   1.603 +
   1.604 +    /* 2nd order fdct */
   1.605 +    mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
   1.606 +
   1.607 +    /* Quantization */
   1.608 +    for (b = 0; b < 16; b++)
   1.609 +    {
   1.610 +        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
   1.611 +    }
   1.612 +
   1.613 +    /* DC predication and Quantization of 2nd Order block */
   1.614 +    mb->quantize_b(mb_y2, x_y2);
   1.615 +
   1.616 +    /* Distortion */
   1.617 +    d = vp8_mbblock_error(mb, 1) << 2;
   1.618 +    d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff);
   1.619 +
   1.620 +    *Distortion = (d >> 4);
   1.621 +
   1.622 +    /* rate */
   1.623 +    *Rate = vp8_rdcost_mby(mb);
   1.624 +}
   1.625 +
   1.626 +static void copy_predictor(unsigned char *dst, const unsigned char *predictor)
   1.627 +{
   1.628 +    const unsigned int *p = (const unsigned int *)predictor;
   1.629 +    unsigned int *d = (unsigned int *)dst;
   1.630 +    d[0] = p[0];
   1.631 +    d[4] = p[4];
   1.632 +    d[8] = p[8];
   1.633 +    d[12] = p[12];
   1.634 +}
   1.635 +static int rd_pick_intra4x4block(
   1.636 +    MACROBLOCK *x,
   1.637 +    BLOCK *be,
   1.638 +    BLOCKD *b,
   1.639 +    B_PREDICTION_MODE *best_mode,
   1.640 +    const int *bmode_costs,
   1.641 +    ENTROPY_CONTEXT *a,
   1.642 +    ENTROPY_CONTEXT *l,
   1.643 +
   1.644 +    int *bestrate,
   1.645 +    int *bestratey,
   1.646 +    int *bestdistortion)
   1.647 +{
   1.648 +    B_PREDICTION_MODE mode;
   1.649 +    int best_rd = INT_MAX;
   1.650 +    int rate = 0;
   1.651 +    int distortion;
   1.652 +
   1.653 +    ENTROPY_CONTEXT ta = *a, tempa = *a;
   1.654 +    ENTROPY_CONTEXT tl = *l, templ = *l;
   1.655 +    /*
   1.656 +     * The predictor buffer is a 2d buffer with a stride of 16.  Create
   1.657 +     * a temp buffer that meets the stride requirements, but we are only
   1.658 +     * interested in the left 4x4 block
   1.659 +     * */
   1.660 +    DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16*4);
   1.661 +    DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16);
   1.662 +    int dst_stride = x->e_mbd.dst.y_stride;
   1.663 +    unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset;
   1.664 +
   1.665 +    unsigned char *Above = dst - dst_stride;
   1.666 +    unsigned char *yleft = dst - 1;
   1.667 +    unsigned char top_left = Above[-1];
   1.668 +
   1.669 +    for (mode = B_DC_PRED; mode <= B_HU_PRED; mode++)
   1.670 +    {
   1.671 +        int this_rd;
   1.672 +        int ratey;
   1.673 +
   1.674 +        rate = bmode_costs[mode];
   1.675 +
   1.676 +        vp8_intra4x4_predict(Above, yleft, dst_stride, mode,
   1.677 +                             b->predictor, 16, top_left);
   1.678 +        vp8_subtract_b(be, b, 16);
   1.679 +        x->short_fdct4x4(be->src_diff, be->coeff, 32);
   1.680 +        x->quantize_b(be, b);
   1.681 +
   1.682 +        tempa = ta;
   1.683 +        templ = tl;
   1.684 +
   1.685 +        ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ);
   1.686 +        rate += ratey;
   1.687 +        distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2;
   1.688 +
   1.689 +        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1.690 +
   1.691 +        if (this_rd < best_rd)
   1.692 +        {
   1.693 +            *bestrate = rate;
   1.694 +            *bestratey = ratey;
   1.695 +            *bestdistortion = distortion;
   1.696 +            best_rd = this_rd;
   1.697 +            *best_mode = mode;
   1.698 +            *a = tempa;
   1.699 +            *l = templ;
   1.700 +            copy_predictor(best_predictor, b->predictor);
   1.701 +            vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
   1.702 +        }
   1.703 +    }
   1.704 +    b->bmi.as_mode = *best_mode;
   1.705 +
   1.706 +    vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride);
   1.707 +
   1.708 +    return best_rd;
   1.709 +}
   1.710 +
   1.711 +static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate,
   1.712 +                                     int *rate_y, int *Distortion, int best_rd)
   1.713 +{
   1.714 +    MACROBLOCKD *const xd = &mb->e_mbd;
   1.715 +    int i;
   1.716 +    int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
   1.717 +    int distortion = 0;
   1.718 +    int tot_rate_y = 0;
   1.719 +    int64_t total_rd = 0;
   1.720 +    ENTROPY_CONTEXT_PLANES t_above, t_left;
   1.721 +    ENTROPY_CONTEXT *ta;
   1.722 +    ENTROPY_CONTEXT *tl;
   1.723 +    const int *bmode_costs;
   1.724 +
   1.725 +    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1.726 +    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1.727 +
   1.728 +    ta = (ENTROPY_CONTEXT *)&t_above;
   1.729 +    tl = (ENTROPY_CONTEXT *)&t_left;
   1.730 +
   1.731 +    intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16);
   1.732 +
   1.733 +    bmode_costs = mb->inter_bmode_costs;
   1.734 +
   1.735 +    for (i = 0; i < 16; i++)
   1.736 +    {
   1.737 +        MODE_INFO *const mic = xd->mode_info_context;
   1.738 +        const int mis = xd->mode_info_stride;
   1.739 +        B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
   1.740 +        int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
   1.741 +
   1.742 +        if (mb->e_mbd.frame_type == KEY_FRAME)
   1.743 +        {
   1.744 +            const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
   1.745 +            const B_PREDICTION_MODE L = left_block_mode(mic, i);
   1.746 +
   1.747 +            bmode_costs  = mb->bmode_costs[A][L];
   1.748 +        }
   1.749 +
   1.750 +        total_rd += rd_pick_intra4x4block(
   1.751 +            mb, mb->block + i, xd->block + i, &best_mode, bmode_costs,
   1.752 +            ta + vp8_block2above[i],
   1.753 +            tl + vp8_block2left[i], &r, &ry, &d);
   1.754 +
   1.755 +        cost += r;
   1.756 +        distortion += d;
   1.757 +        tot_rate_y += ry;
   1.758 +
   1.759 +        mic->bmi[i].as_mode = best_mode;
   1.760 +
   1.761 +        if(total_rd >= (int64_t)best_rd)
   1.762 +            break;
   1.763 +    }
   1.764 +
   1.765 +    if(total_rd >= (int64_t)best_rd)
   1.766 +        return INT_MAX;
   1.767 +
   1.768 +    *Rate = cost;
   1.769 +    *rate_y = tot_rate_y;
   1.770 +    *Distortion = distortion;
   1.771 +
   1.772 +    return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
   1.773 +}
   1.774 +
   1.775 +
   1.776 +static int rd_pick_intra16x16mby_mode(MACROBLOCK *x,
   1.777 +                                      int *Rate,
   1.778 +                                      int *rate_y,
   1.779 +                                      int *Distortion)
   1.780 +{
   1.781 +    MB_PREDICTION_MODE mode;
   1.782 +    MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
   1.783 +    int rate, ratey;
   1.784 +    int distortion;
   1.785 +    int best_rd = INT_MAX;
   1.786 +    int this_rd;
   1.787 +    MACROBLOCKD *xd = &x->e_mbd;
   1.788 +
   1.789 +    /* Y Search for 16x16 intra prediction mode */
   1.790 +    for (mode = DC_PRED; mode <= TM_PRED; mode++)
   1.791 +    {
   1.792 +        xd->mode_info_context->mbmi.mode = mode;
   1.793 +
   1.794 +        vp8_build_intra_predictors_mby_s(xd,
   1.795 +                                         xd->dst.y_buffer - xd->dst.y_stride,
   1.796 +                                         xd->dst.y_buffer - 1,
   1.797 +                                         xd->dst.y_stride,
   1.798 +                                         xd->predictor,
   1.799 +                                         16);
   1.800 +
   1.801 +        macro_block_yrd(x, &ratey, &distortion);
   1.802 +        rate = ratey + x->mbmode_cost[xd->frame_type]
   1.803 +                                     [xd->mode_info_context->mbmi.mode];
   1.804 +
   1.805 +        this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
   1.806 +
   1.807 +        if (this_rd < best_rd)
   1.808 +        {
   1.809 +            mode_selected = mode;
   1.810 +            best_rd = this_rd;
   1.811 +            *Rate = rate;
   1.812 +            *rate_y = ratey;
   1.813 +            *Distortion = distortion;
   1.814 +        }
   1.815 +    }
   1.816 +
   1.817 +    xd->mode_info_context->mbmi.mode = mode_selected;
   1.818 +    return best_rd;
   1.819 +}
   1.820 +
   1.821 +static int rd_cost_mbuv(MACROBLOCK *mb)
   1.822 +{
   1.823 +    int b;
   1.824 +    int cost = 0;
   1.825 +    MACROBLOCKD *x = &mb->e_mbd;
   1.826 +    ENTROPY_CONTEXT_PLANES t_above, t_left;
   1.827 +    ENTROPY_CONTEXT *ta;
   1.828 +    ENTROPY_CONTEXT *tl;
   1.829 +
   1.830 +    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1.831 +    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
   1.832 +
   1.833 +    ta = (ENTROPY_CONTEXT *)&t_above;
   1.834 +    tl = (ENTROPY_CONTEXT *)&t_left;
   1.835 +
   1.836 +    for (b = 16; b < 24; b++)
   1.837 +        cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV,
   1.838 +                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
   1.839 +
   1.840 +    return cost;
   1.841 +}
   1.842 +
   1.843 +
   1.844 +static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
   1.845 +                            int *distortion, int fullpixel)
   1.846 +{
   1.847 +    vp8_build_inter16x16_predictors_mbuv(&x->e_mbd);
   1.848 +    vp8_subtract_mbuv(x->src_diff,
   1.849 +        x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
   1.850 +        &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
   1.851 +
   1.852 +    vp8_transform_mbuv(x);
   1.853 +    vp8_quantize_mbuv(x);
   1.854 +
   1.855 +    *rate       = rd_cost_mbuv(x);
   1.856 +    *distortion = vp8_mbuverror(x) / 4;
   1.857 +
   1.858 +    return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
   1.859 +}
   1.860 +
   1.861 +static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
   1.862 +                          int *distortion, int fullpixel)
   1.863 +{
   1.864 +    vp8_build_inter4x4_predictors_mbuv(&x->e_mbd);
   1.865 +    vp8_subtract_mbuv(x->src_diff,
   1.866 +        x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
   1.867 +        &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8);
   1.868 +
   1.869 +    vp8_transform_mbuv(x);
   1.870 +    vp8_quantize_mbuv(x);
   1.871 +
   1.872 +    *rate       = rd_cost_mbuv(x);
   1.873 +    *distortion = vp8_mbuverror(x) / 4;
   1.874 +
   1.875 +    return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
   1.876 +}
   1.877 +
   1.878 +static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
   1.879 +                                    int *rate_tokenonly, int *distortion)
   1.880 +{
   1.881 +    MB_PREDICTION_MODE mode;
   1.882 +    MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
   1.883 +    int best_rd = INT_MAX;
   1.884 +    int UNINITIALIZED_IS_SAFE(d), UNINITIALIZED_IS_SAFE(r);
   1.885 +    int rate_to;
   1.886 +    MACROBLOCKD *xd = &x->e_mbd;
   1.887 +
   1.888 +    for (mode = DC_PRED; mode <= TM_PRED; mode++)
   1.889 +    {
   1.890 +        int this_rate;
   1.891 +        int this_distortion;
   1.892 +        int this_rd;
   1.893 +
   1.894 +        xd->mode_info_context->mbmi.uv_mode = mode;
   1.895 +
   1.896 +        vp8_build_intra_predictors_mbuv_s(xd,
   1.897 +                                          xd->dst.u_buffer - xd->dst.uv_stride,
   1.898 +                                          xd->dst.v_buffer - xd->dst.uv_stride,
   1.899 +                                          xd->dst.u_buffer - 1,
   1.900 +                                          xd->dst.v_buffer - 1,
   1.901 +                                          xd->dst.uv_stride,
   1.902 +                                          &xd->predictor[256], &xd->predictor[320],
   1.903 +                                          8);
   1.904 +
   1.905 +
   1.906 +        vp8_subtract_mbuv(x->src_diff,
   1.907 +                      x->src.u_buffer, x->src.v_buffer, x->src.uv_stride,
   1.908 +                      &xd->predictor[256], &xd->predictor[320], 8);
   1.909 +        vp8_transform_mbuv(x);
   1.910 +        vp8_quantize_mbuv(x);
   1.911 +
   1.912 +        rate_to = rd_cost_mbuv(x);
   1.913 +        this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type][xd->mode_info_context->mbmi.uv_mode];
   1.914 +
   1.915 +        this_distortion = vp8_mbuverror(x) / 4;
   1.916 +
   1.917 +        this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
   1.918 +
   1.919 +        if (this_rd < best_rd)
   1.920 +        {
   1.921 +            best_rd = this_rd;
   1.922 +            d = this_distortion;
   1.923 +            r = this_rate;
   1.924 +            *rate_tokenonly = rate_to;
   1.925 +            mode_selected = mode;
   1.926 +        }
   1.927 +    }
   1.928 +
   1.929 +    *rate = r;
   1.930 +    *distortion = d;
   1.931 +
   1.932 +    xd->mode_info_context->mbmi.uv_mode = mode_selected;
   1.933 +}
   1.934 +
   1.935 +int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
   1.936 +{
   1.937 +    vp8_prob p [VP8_MVREFS-1];
   1.938 +    assert(NEARESTMV <= m  &&  m <= SPLITMV);
   1.939 +    vp8_mv_ref_probs(p, near_mv_ref_ct);
   1.940 +    return vp8_cost_token(vp8_mv_ref_tree, p,
   1.941 +                          vp8_mv_ref_encoding_array + (m - NEARESTMV));
   1.942 +}
   1.943 +
   1.944 +void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
   1.945 +{
   1.946 +    x->e_mbd.mode_info_context->mbmi.mode = mb;
   1.947 +    x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int;
   1.948 +}
   1.949 +
   1.950 +static int labels2mode(
   1.951 +    MACROBLOCK *x,
   1.952 +    int const *labelings, int which_label,
   1.953 +    B_PREDICTION_MODE this_mode,
   1.954 +    int_mv *this_mv, int_mv *best_ref_mv,
   1.955 +    int *mvcost[2]
   1.956 +)
   1.957 +{
   1.958 +    MACROBLOCKD *const xd = & x->e_mbd;
   1.959 +    MODE_INFO *const mic = xd->mode_info_context;
   1.960 +    const int mis = xd->mode_info_stride;
   1.961 +
   1.962 +    int cost = 0;
   1.963 +    int thismvcost = 0;
   1.964 +
   1.965 +    /* We have to be careful retrieving previously-encoded motion vectors.
   1.966 +       Ones from this macroblock have to be pulled from the BLOCKD array
   1.967 +       as they have not yet made it to the bmi array in our MB_MODE_INFO. */
   1.968 +
   1.969 +    int i = 0;
   1.970 +
   1.971 +    do
   1.972 +    {
   1.973 +        BLOCKD *const d = xd->block + i;
   1.974 +        const int row = i >> 2,  col = i & 3;
   1.975 +
   1.976 +        B_PREDICTION_MODE m;
   1.977 +
   1.978 +        if (labelings[i] != which_label)
   1.979 +            continue;
   1.980 +
   1.981 +        if (col  &&  labelings[i] == labelings[i-1])
   1.982 +            m = LEFT4X4;
   1.983 +        else if (row  &&  labelings[i] == labelings[i-4])
   1.984 +            m = ABOVE4X4;
   1.985 +        else
   1.986 +        {
   1.987 +            /* the only time we should do costing for new motion vector
   1.988 +             * or mode is when we are on a new label  (jbb May 08, 2007)
   1.989 +             */
   1.990 +            switch (m = this_mode)
   1.991 +            {
   1.992 +            case NEW4X4 :
   1.993 +                thismvcost  = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102);
   1.994 +                break;
   1.995 +            case LEFT4X4:
   1.996 +                this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i);
   1.997 +                break;
   1.998 +            case ABOVE4X4:
   1.999 +                this_mv->as_int = row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis);
  1.1000 +                break;
  1.1001 +            case ZERO4X4:
  1.1002 +                this_mv->as_int = 0;
  1.1003 +                break;
  1.1004 +            default:
  1.1005 +                break;
  1.1006 +            }
  1.1007 +
  1.1008 +            if (m == ABOVE4X4)  /* replace above with left if same */
  1.1009 +            {
  1.1010 +                int_mv left_mv;
  1.1011 +
  1.1012 +                left_mv.as_int = col ? d[-1].bmi.mv.as_int :
  1.1013 +                                        left_block_mv(mic, i);
  1.1014 +
  1.1015 +                if (left_mv.as_int == this_mv->as_int)
  1.1016 +                    m = LEFT4X4;
  1.1017 +            }
  1.1018 +
  1.1019 +            cost = x->inter_bmode_costs[ m];
  1.1020 +        }
  1.1021 +
  1.1022 +        d->bmi.mv.as_int = this_mv->as_int;
  1.1023 +
  1.1024 +        x->partition_info->bmi[i].mode = m;
  1.1025 +        x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
  1.1026 +
  1.1027 +    }
  1.1028 +    while (++i < 16);
  1.1029 +
  1.1030 +    cost += thismvcost ;
  1.1031 +    return cost;
  1.1032 +}
  1.1033 +
  1.1034 +static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
  1.1035 +                              int which_label, ENTROPY_CONTEXT *ta,
  1.1036 +                              ENTROPY_CONTEXT *tl)
  1.1037 +{
  1.1038 +    int cost = 0;
  1.1039 +    int b;
  1.1040 +    MACROBLOCKD *x = &mb->e_mbd;
  1.1041 +
  1.1042 +    for (b = 0; b < 16; b++)
  1.1043 +        if (labels[ b] == which_label)
  1.1044 +            cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC,
  1.1045 +                                ta + vp8_block2above[b],
  1.1046 +                                tl + vp8_block2left[b]);
  1.1047 +
  1.1048 +    return cost;
  1.1049 +
  1.1050 +}
  1.1051 +static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label)
  1.1052 +{
  1.1053 +    int i;
  1.1054 +    unsigned int distortion = 0;
  1.1055 +    int pre_stride = x->e_mbd.pre.y_stride;
  1.1056 +    unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1.1057 +
  1.1058 +
  1.1059 +    for (i = 0; i < 16; i++)
  1.1060 +    {
  1.1061 +        if (labels[i] == which_label)
  1.1062 +        {
  1.1063 +            BLOCKD *bd = &x->e_mbd.block[i];
  1.1064 +            BLOCK *be = &x->block[i];
  1.1065 +
  1.1066 +            vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict);
  1.1067 +            vp8_subtract_b(be, bd, 16);
  1.1068 +            x->short_fdct4x4(be->src_diff, be->coeff, 32);
  1.1069 +            x->quantize_b(be, bd);
  1.1070 +
  1.1071 +            distortion += vp8_block_error(be->coeff, bd->dqcoeff);
  1.1072 +        }
  1.1073 +    }
  1.1074 +
  1.1075 +    return distortion;
  1.1076 +}
  1.1077 +
  1.1078 +
  1.1079 +static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
  1.1080 +
  1.1081 +
  1.1082 +typedef struct
  1.1083 +{
  1.1084 +  int_mv *ref_mv;
  1.1085 +  int_mv mvp;
  1.1086 +
  1.1087 +  int segment_rd;
  1.1088 +  int segment_num;
  1.1089 +  int r;
  1.1090 +  int d;
  1.1091 +  int segment_yrate;
  1.1092 +  B_PREDICTION_MODE modes[16];
  1.1093 +  int_mv mvs[16];
  1.1094 +  unsigned char eobs[16];
  1.1095 +
  1.1096 +  int mvthresh;
  1.1097 +  int *mdcounts;
  1.1098 +
  1.1099 +  int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */
  1.1100 +  int sv_istep[2];  /* save 2 initial step_param for 16x8/8x16 */
  1.1101 +
  1.1102 +} BEST_SEG_INFO;
  1.1103 +
  1.1104 +
  1.1105 +static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
  1.1106 +                             BEST_SEG_INFO *bsi, unsigned int segmentation)
  1.1107 +{
  1.1108 +    int i;
  1.1109 +    int const *labels;
  1.1110 +    int br = 0;
  1.1111 +    int bd = 0;
  1.1112 +    B_PREDICTION_MODE this_mode;
  1.1113 +
  1.1114 +
  1.1115 +    int label_count;
  1.1116 +    int this_segment_rd = 0;
  1.1117 +    int label_mv_thresh;
  1.1118 +    int rate = 0;
  1.1119 +    int sbr = 0;
  1.1120 +    int sbd = 0;
  1.1121 +    int segmentyrate = 0;
  1.1122 +
  1.1123 +    vp8_variance_fn_ptr_t *v_fn_ptr;
  1.1124 +
  1.1125 +    ENTROPY_CONTEXT_PLANES t_above, t_left;
  1.1126 +    ENTROPY_CONTEXT *ta;
  1.1127 +    ENTROPY_CONTEXT *tl;
  1.1128 +    ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
  1.1129 +    ENTROPY_CONTEXT *ta_b;
  1.1130 +    ENTROPY_CONTEXT *tl_b;
  1.1131 +
  1.1132 +    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1133 +    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1134 +
  1.1135 +    ta = (ENTROPY_CONTEXT *)&t_above;
  1.1136 +    tl = (ENTROPY_CONTEXT *)&t_left;
  1.1137 +    ta_b = (ENTROPY_CONTEXT *)&t_above_b;
  1.1138 +    tl_b = (ENTROPY_CONTEXT *)&t_left_b;
  1.1139 +
  1.1140 +    br = 0;
  1.1141 +    bd = 0;
  1.1142 +
  1.1143 +    v_fn_ptr = &cpi->fn_ptr[segmentation];
  1.1144 +    labels = vp8_mbsplits[segmentation];
  1.1145 +    label_count = vp8_mbsplit_count[segmentation];
  1.1146 +
  1.1147 +    /* 64 makes this threshold really big effectively making it so that we
  1.1148 +     * very rarely check mvs on segments.   setting this to 1 would make mv
  1.1149 +     * thresh roughly equal to what it is for macroblocks
  1.1150 +     */
  1.1151 +    label_mv_thresh = 1 * bsi->mvthresh / label_count ;
  1.1152 +
  1.1153 +    /* Segmentation method overheads */
  1.1154 +    rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation);
  1.1155 +    rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts);
  1.1156 +    this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
  1.1157 +    br += rate;
  1.1158 +
  1.1159 +    for (i = 0; i < label_count; i++)
  1.1160 +    {
  1.1161 +        int_mv mode_mv[B_MODE_COUNT];
  1.1162 +        int best_label_rd = INT_MAX;
  1.1163 +        B_PREDICTION_MODE mode_selected = ZERO4X4;
  1.1164 +        int bestlabelyrate = 0;
  1.1165 +
  1.1166 +        /* search for the best motion vector on this segment */
  1.1167 +        for (this_mode = LEFT4X4; this_mode <= NEW4X4 ; this_mode ++)
  1.1168 +        {
  1.1169 +            int this_rd;
  1.1170 +            int distortion;
  1.1171 +            int labelyrate;
  1.1172 +            ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
  1.1173 +            ENTROPY_CONTEXT *ta_s;
  1.1174 +            ENTROPY_CONTEXT *tl_s;
  1.1175 +
  1.1176 +            vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1177 +            vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1178 +
  1.1179 +            ta_s = (ENTROPY_CONTEXT *)&t_above_s;
  1.1180 +            tl_s = (ENTROPY_CONTEXT *)&t_left_s;
  1.1181 +
  1.1182 +            if (this_mode == NEW4X4)
  1.1183 +            {
  1.1184 +                int sseshift;
  1.1185 +                int num00;
  1.1186 +                int step_param = 0;
  1.1187 +                int further_steps;
  1.1188 +                int n;
  1.1189 +                int thissme;
  1.1190 +                int bestsme = INT_MAX;
  1.1191 +                int_mv  temp_mv;
  1.1192 +                BLOCK *c;
  1.1193 +                BLOCKD *e;
  1.1194 +
  1.1195 +                /* Is the best so far sufficiently good that we cant justify
  1.1196 +                 * doing a new motion search.
  1.1197 +                 */
  1.1198 +                if (best_label_rd < label_mv_thresh)
  1.1199 +                    break;
  1.1200 +
  1.1201 +                if(cpi->compressor_speed)
  1.1202 +                {
  1.1203 +                    if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8)
  1.1204 +                    {
  1.1205 +                        bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
  1.1206 +                        if (i==1 && segmentation == BLOCK_16X8)
  1.1207 +                          bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
  1.1208 +
  1.1209 +                        step_param = bsi->sv_istep[i];
  1.1210 +                    }
  1.1211 +
  1.1212 +                    /* use previous block's result as next block's MV
  1.1213 +                     * predictor.
  1.1214 +                     */
  1.1215 +                    if (segmentation == BLOCK_4X4 && i>0)
  1.1216 +                    {
  1.1217 +                        bsi->mvp.as_int = x->e_mbd.block[i-1].bmi.mv.as_int;
  1.1218 +                        if (i==4 || i==8 || i==12)
  1.1219 +                            bsi->mvp.as_int = x->e_mbd.block[i-4].bmi.mv.as_int;
  1.1220 +                        step_param = 2;
  1.1221 +                    }
  1.1222 +                }
  1.1223 +
  1.1224 +                further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
  1.1225 +
  1.1226 +                {
  1.1227 +                    int sadpb = x->sadperbit4;
  1.1228 +                    int_mv mvp_full;
  1.1229 +
  1.1230 +                    mvp_full.as_mv.row = bsi->mvp.as_mv.row >>3;
  1.1231 +                    mvp_full.as_mv.col = bsi->mvp.as_mv.col >>3;
  1.1232 +
  1.1233 +                    /* find first label */
  1.1234 +                    n = vp8_mbsplit_offset[segmentation][i];
  1.1235 +
  1.1236 +                    c = &x->block[n];
  1.1237 +                    e = &x->e_mbd.block[n];
  1.1238 +
  1.1239 +                    {
  1.1240 +                        bestsme = cpi->diamond_search_sad(x, c, e, &mvp_full,
  1.1241 +                                                &mode_mv[NEW4X4], step_param,
  1.1242 +                                                sadpb, &num00, v_fn_ptr,
  1.1243 +                                                x->mvcost, bsi->ref_mv);
  1.1244 +
  1.1245 +                        n = num00;
  1.1246 +                        num00 = 0;
  1.1247 +
  1.1248 +                        while (n < further_steps)
  1.1249 +                        {
  1.1250 +                            n++;
  1.1251 +
  1.1252 +                            if (num00)
  1.1253 +                                num00--;
  1.1254 +                            else
  1.1255 +                            {
  1.1256 +                                thissme = cpi->diamond_search_sad(x, c, e,
  1.1257 +                                                    &mvp_full, &temp_mv,
  1.1258 +                                                    step_param + n, sadpb,
  1.1259 +                                                    &num00, v_fn_ptr,
  1.1260 +                                                    x->mvcost, bsi->ref_mv);
  1.1261 +
  1.1262 +                                if (thissme < bestsme)
  1.1263 +                                {
  1.1264 +                                    bestsme = thissme;
  1.1265 +                                    mode_mv[NEW4X4].as_int = temp_mv.as_int;
  1.1266 +                                }
  1.1267 +                            }
  1.1268 +                        }
  1.1269 +                    }
  1.1270 +
  1.1271 +                    sseshift = segmentation_to_sseshift[segmentation];
  1.1272 +
  1.1273 +                    /* Should we do a full search (best quality only) */
  1.1274 +                    if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000)
  1.1275 +                    {
  1.1276 +                        /* Check if mvp_full is within the range. */
  1.1277 +                        vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  1.1278 +
  1.1279 +                        thissme = cpi->full_search_sad(x, c, e, &mvp_full,
  1.1280 +                                                       sadpb, 16, v_fn_ptr,
  1.1281 +                                                       x->mvcost, bsi->ref_mv);
  1.1282 +
  1.1283 +                        if (thissme < bestsme)
  1.1284 +                        {
  1.1285 +                            bestsme = thissme;
  1.1286 +                            mode_mv[NEW4X4].as_int = e->bmi.mv.as_int;
  1.1287 +                        }
  1.1288 +                        else
  1.1289 +                        {
  1.1290 +                            /* The full search result is actually worse so
  1.1291 +                             * re-instate the previous best vector
  1.1292 +                             */
  1.1293 +                            e->bmi.mv.as_int = mode_mv[NEW4X4].as_int;
  1.1294 +                        }
  1.1295 +                    }
  1.1296 +                }
  1.1297 +
  1.1298 +                if (bestsme < INT_MAX)
  1.1299 +                {
  1.1300 +                    int disto;
  1.1301 +                    unsigned int sse;
  1.1302 +                    cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
  1.1303 +                        bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost,
  1.1304 +                        &disto, &sse);
  1.1305 +                }
  1.1306 +            } /* NEW4X4 */
  1.1307 +
  1.1308 +            rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
  1.1309 +                               bsi->ref_mv, x->mvcost);
  1.1310 +
  1.1311 +            /* Trap vectors that reach beyond the UMV borders */
  1.1312 +            if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
  1.1313 +                ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
  1.1314 +            {
  1.1315 +                continue;
  1.1316 +            }
  1.1317 +
  1.1318 +            distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4;
  1.1319 +
  1.1320 +            labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
  1.1321 +            rate += labelyrate;
  1.1322 +
  1.1323 +            this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
  1.1324 +
  1.1325 +            if (this_rd < best_label_rd)
  1.1326 +            {
  1.1327 +                sbr = rate;
  1.1328 +                sbd = distortion;
  1.1329 +                bestlabelyrate = labelyrate;
  1.1330 +                mode_selected = this_mode;
  1.1331 +                best_label_rd = this_rd;
  1.1332 +
  1.1333 +                vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1334 +                vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1335 +
  1.1336 +            }
  1.1337 +        } /*for each 4x4 mode*/
  1.1338 +
  1.1339 +        vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1340 +        vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
  1.1341 +
  1.1342 +        labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
  1.1343 +                    bsi->ref_mv, x->mvcost);
  1.1344 +
  1.1345 +        br += sbr;
  1.1346 +        bd += sbd;
  1.1347 +        segmentyrate += bestlabelyrate;
  1.1348 +        this_segment_rd += best_label_rd;
  1.1349 +
  1.1350 +        if (this_segment_rd >= bsi->segment_rd)
  1.1351 +            break;
  1.1352 +
  1.1353 +    } /* for each label */
  1.1354 +
  1.1355 +    if (this_segment_rd < bsi->segment_rd)
  1.1356 +    {
  1.1357 +        bsi->r = br;
  1.1358 +        bsi->d = bd;
  1.1359 +        bsi->segment_yrate = segmentyrate;
  1.1360 +        bsi->segment_rd = this_segment_rd;
  1.1361 +        bsi->segment_num = segmentation;
  1.1362 +
  1.1363 +        /* store everything needed to come back to this!! */
  1.1364 +        for (i = 0; i < 16; i++)
  1.1365 +        {
  1.1366 +            bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
  1.1367 +            bsi->modes[i] = x->partition_info->bmi[i].mode;
  1.1368 +            bsi->eobs[i] = x->e_mbd.eobs[i];
  1.1369 +        }
  1.1370 +    }
  1.1371 +}
  1.1372 +
  1.1373 +static
  1.1374 +void vp8_cal_step_param(int sr, int *sp)
  1.1375 +{
  1.1376 +    int step = 0;
  1.1377 +
  1.1378 +    if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
  1.1379 +    else if (sr < 1) sr = 1;
  1.1380 +
  1.1381 +    while (sr>>=1)
  1.1382 +        step++;
  1.1383 +
  1.1384 +    *sp = MAX_MVSEARCH_STEPS - 1 - step;
  1.1385 +}
  1.1386 +
  1.1387 +static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x,
  1.1388 +                                           int_mv *best_ref_mv, int best_rd,
  1.1389 +                                           int *mdcounts, int *returntotrate,
  1.1390 +                                           int *returnyrate, int *returndistortion,
  1.1391 +                                           int mvthresh)
  1.1392 +{
  1.1393 +    int i;
  1.1394 +    BEST_SEG_INFO bsi;
  1.1395 +
  1.1396 +    vpx_memset(&bsi, 0, sizeof(bsi));
  1.1397 +
  1.1398 +    bsi.segment_rd = best_rd;
  1.1399 +    bsi.ref_mv = best_ref_mv;
  1.1400 +    bsi.mvp.as_int = best_ref_mv->as_int;
  1.1401 +    bsi.mvthresh = mvthresh;
  1.1402 +    bsi.mdcounts = mdcounts;
  1.1403 +
  1.1404 +    for(i = 0; i < 16; i++)
  1.1405 +    {
  1.1406 +        bsi.modes[i] = ZERO4X4;
  1.1407 +    }
  1.1408 +
  1.1409 +    if(cpi->compressor_speed == 0)
  1.1410 +    {
  1.1411 +        /* for now, we will keep the original segmentation order
  1.1412 +           when in best quality mode */
  1.1413 +        rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
  1.1414 +        rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
  1.1415 +        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
  1.1416 +        rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
  1.1417 +    }
  1.1418 +    else
  1.1419 +    {
  1.1420 +        int sr;
  1.1421 +
  1.1422 +        rd_check_segment(cpi, x, &bsi, BLOCK_8X8);
  1.1423 +
  1.1424 +        if (bsi.segment_rd < best_rd)
  1.1425 +        {
  1.1426 +            int col_min = ((best_ref_mv->as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
  1.1427 +            int row_min = ((best_ref_mv->as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
  1.1428 +            int col_max = (best_ref_mv->as_mv.col>>3) + MAX_FULL_PEL_VAL;
  1.1429 +            int row_max = (best_ref_mv->as_mv.row>>3) + MAX_FULL_PEL_VAL;
  1.1430 +
  1.1431 +            int tmp_col_min = x->mv_col_min;
  1.1432 +            int tmp_col_max = x->mv_col_max;
  1.1433 +            int tmp_row_min = x->mv_row_min;
  1.1434 +            int tmp_row_max = x->mv_row_max;
  1.1435 +
  1.1436 +            /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
  1.1437 +            if (x->mv_col_min < col_min )
  1.1438 +                x->mv_col_min = col_min;
  1.1439 +            if (x->mv_col_max > col_max )
  1.1440 +                x->mv_col_max = col_max;
  1.1441 +            if (x->mv_row_min < row_min )
  1.1442 +                x->mv_row_min = row_min;
  1.1443 +            if (x->mv_row_max > row_max )
  1.1444 +                x->mv_row_max = row_max;
  1.1445 +
  1.1446 +            /* Get 8x8 result */
  1.1447 +            bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
  1.1448 +            bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
  1.1449 +            bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
  1.1450 +            bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
  1.1451 +
  1.1452 +            /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range according to the closeness of 2 MV. */
  1.1453 +            /* block 8X16 */
  1.1454 +            {
  1.1455 +                sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col))>>3);
  1.1456 +                vp8_cal_step_param(sr, &bsi.sv_istep[0]);
  1.1457 +
  1.1458 +                sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
  1.1459 +                vp8_cal_step_param(sr, &bsi.sv_istep[1]);
  1.1460 +
  1.1461 +                rd_check_segment(cpi, x, &bsi, BLOCK_8X16);
  1.1462 +            }
  1.1463 +
  1.1464 +            /* block 16X8 */
  1.1465 +            {
  1.1466 +                sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row))>>3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col))>>3);
  1.1467 +                vp8_cal_step_param(sr, &bsi.sv_istep[0]);
  1.1468 +
  1.1469 +                sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row))>>3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col))>>3);
  1.1470 +                vp8_cal_step_param(sr, &bsi.sv_istep[1]);
  1.1471 +
  1.1472 +                rd_check_segment(cpi, x, &bsi, BLOCK_16X8);
  1.1473 +            }
  1.1474 +
  1.1475 +            /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
  1.1476 +            /* Not skip 4x4 if speed=0 (good quality) */
  1.1477 +            if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8)  /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
  1.1478 +            {
  1.1479 +                bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
  1.1480 +                rd_check_segment(cpi, x, &bsi, BLOCK_4X4);
  1.1481 +            }
  1.1482 +
  1.1483 +            /* restore UMV window */
  1.1484 +            x->mv_col_min = tmp_col_min;
  1.1485 +            x->mv_col_max = tmp_col_max;
  1.1486 +            x->mv_row_min = tmp_row_min;
  1.1487 +            x->mv_row_max = tmp_row_max;
  1.1488 +        }
  1.1489 +    }
  1.1490 +
  1.1491 +    /* set it to the best */
  1.1492 +    for (i = 0; i < 16; i++)
  1.1493 +    {
  1.1494 +        BLOCKD *bd = &x->e_mbd.block[i];
  1.1495 +
  1.1496 +        bd->bmi.mv.as_int = bsi.mvs[i].as_int;
  1.1497 +        *bd->eob = bsi.eobs[i];
  1.1498 +    }
  1.1499 +
  1.1500 +    *returntotrate = bsi.r;
  1.1501 +    *returndistortion = bsi.d;
  1.1502 +    *returnyrate = bsi.segment_yrate;
  1.1503 +
  1.1504 +    /* save partitions */
  1.1505 +    x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num;
  1.1506 +    x->partition_info->count = vp8_mbsplit_count[bsi.segment_num];
  1.1507 +
  1.1508 +    for (i = 0; i < x->partition_info->count; i++)
  1.1509 +    {
  1.1510 +        int j;
  1.1511 +
  1.1512 +        j = vp8_mbsplit_offset[bsi.segment_num][i];
  1.1513 +
  1.1514 +        x->partition_info->bmi[i].mode = bsi.modes[j];
  1.1515 +        x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
  1.1516 +    }
  1.1517 +    /*
  1.1518 +     * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int
  1.1519 +     */
  1.1520 +    x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
  1.1521 +
  1.1522 +    return bsi.segment_rd;
  1.1523 +}
  1.1524 +
  1.1525 +/* The improved MV prediction */
  1.1526 +void vp8_mv_pred
  1.1527 +(
  1.1528 +    VP8_COMP *cpi,
  1.1529 +    MACROBLOCKD *xd,
  1.1530 +    const MODE_INFO *here,
  1.1531 +    int_mv *mvp,
  1.1532 +    int refframe,
  1.1533 +    int *ref_frame_sign_bias,
  1.1534 +    int *sr,
  1.1535 +    int near_sadidx[]
  1.1536 +)
  1.1537 +{
  1.1538 +    const MODE_INFO *above = here - xd->mode_info_stride;
  1.1539 +    const MODE_INFO *left = here - 1;
  1.1540 +    const MODE_INFO *aboveleft = above - 1;
  1.1541 +    int_mv           near_mvs[8];
  1.1542 +    int              near_ref[8];
  1.1543 +    int_mv           mv;
  1.1544 +    int              vcnt=0;
  1.1545 +    int              find=0;
  1.1546 +    int              mb_offset;
  1.1547 +
  1.1548 +    int              mvx[8];
  1.1549 +    int              mvy[8];
  1.1550 +    int              i;
  1.1551 +
  1.1552 +    mv.as_int = 0;
  1.1553 +
  1.1554 +    if(here->mbmi.ref_frame != INTRA_FRAME)
  1.1555 +    {
  1.1556 +        near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0;
  1.1557 +        near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0;
  1.1558 +
  1.1559 +        /* read in 3 nearby block's MVs from current frame as prediction
  1.1560 +         * candidates.
  1.1561 +         */
  1.1562 +        if (above->mbmi.ref_frame != INTRA_FRAME)
  1.1563 +        {
  1.1564 +            near_mvs[vcnt].as_int = above->mbmi.mv.as_int;
  1.1565 +            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1566 +            near_ref[vcnt] =  above->mbmi.ref_frame;
  1.1567 +        }
  1.1568 +        vcnt++;
  1.1569 +        if (left->mbmi.ref_frame != INTRA_FRAME)
  1.1570 +        {
  1.1571 +            near_mvs[vcnt].as_int = left->mbmi.mv.as_int;
  1.1572 +            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1573 +            near_ref[vcnt] =  left->mbmi.ref_frame;
  1.1574 +        }
  1.1575 +        vcnt++;
  1.1576 +        if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
  1.1577 +        {
  1.1578 +            near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int;
  1.1579 +            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1580 +            near_ref[vcnt] =  aboveleft->mbmi.ref_frame;
  1.1581 +        }
  1.1582 +        vcnt++;
  1.1583 +
  1.1584 +        /* read in 5 nearby block's MVs from last frame. */
  1.1585 +        if(cpi->common.last_frame_type != KEY_FRAME)
  1.1586 +        {
  1.1587 +            mb_offset = (-xd->mb_to_top_edge/128 + 1) * (xd->mode_info_stride +1) + (-xd->mb_to_left_edge/128 +1) ;
  1.1588 +
  1.1589 +            /* current in last frame */
  1.1590 +            if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME)
  1.1591 +            {
  1.1592 +                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int;
  1.1593 +                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1594 +                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset];
  1.1595 +            }
  1.1596 +            vcnt++;
  1.1597 +
  1.1598 +            /* above in last frame */
  1.1599 +            if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1] != INTRA_FRAME)
  1.1600 +            {
  1.1601 +                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride-1].as_int;
  1.1602 +                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride-1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1603 +                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - xd->mode_info_stride-1];
  1.1604 +            }
  1.1605 +            vcnt++;
  1.1606 +
  1.1607 +            /* left in last frame */
  1.1608 +            if (cpi->lf_ref_frame[mb_offset-1] != INTRA_FRAME)
  1.1609 +            {
  1.1610 +                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset -1].as_int;
  1.1611 +                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset -1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1612 +                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset - 1];
  1.1613 +            }
  1.1614 +            vcnt++;
  1.1615 +
  1.1616 +            /* right in last frame */
  1.1617 +            if (cpi->lf_ref_frame[mb_offset +1] != INTRA_FRAME)
  1.1618 +            {
  1.1619 +                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset +1].as_int;
  1.1620 +                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1621 +                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset +1];
  1.1622 +            }
  1.1623 +            vcnt++;
  1.1624 +
  1.1625 +            /* below in last frame */
  1.1626 +            if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1] != INTRA_FRAME)
  1.1627 +            {
  1.1628 +                near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride +1].as_int;
  1.1629 +                mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride +1], refframe, &near_mvs[vcnt], ref_frame_sign_bias);
  1.1630 +                near_ref[vcnt] =  cpi->lf_ref_frame[mb_offset + xd->mode_info_stride +1];
  1.1631 +            }
  1.1632 +            vcnt++;
  1.1633 +        }
  1.1634 +
  1.1635 +        for(i=0; i< vcnt; i++)
  1.1636 +        {
  1.1637 +            if(near_ref[near_sadidx[i]] != INTRA_FRAME)
  1.1638 +            {
  1.1639 +                if(here->mbmi.ref_frame == near_ref[near_sadidx[i]])
  1.1640 +                {
  1.1641 +                    mv.as_int = near_mvs[near_sadidx[i]].as_int;
  1.1642 +                    find = 1;
  1.1643 +                    if (i < 3)
  1.1644 +                        *sr = 3;
  1.1645 +                    else
  1.1646 +                        *sr = 2;
  1.1647 +                    break;
  1.1648 +                }
  1.1649 +            }
  1.1650 +        }
  1.1651 +
  1.1652 +        if(!find)
  1.1653 +        {
  1.1654 +            for(i=0; i<vcnt; i++)
  1.1655 +            {
  1.1656 +                mvx[i] = near_mvs[i].as_mv.row;
  1.1657 +                mvy[i] = near_mvs[i].as_mv.col;
  1.1658 +            }
  1.1659 +
  1.1660 +            insertsortmv(mvx, vcnt);
  1.1661 +            insertsortmv(mvy, vcnt);
  1.1662 +            mv.as_mv.row = mvx[vcnt/2];
  1.1663 +            mv.as_mv.col = mvy[vcnt/2];
  1.1664 +
  1.1665 +            find = 1;
  1.1666 +            /* sr is set to 0 to allow calling function to decide the search
  1.1667 +             * range.
  1.1668 +             */
  1.1669 +            *sr = 0;
  1.1670 +        }
  1.1671 +    }
  1.1672 +
  1.1673 +    /* Set up return values */
  1.1674 +    mvp->as_int = mv.as_int;
  1.1675 +    vp8_clamp_mv2(mvp, xd);
  1.1676 +}
  1.1677 +
  1.1678 +void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[])
  1.1679 +{
  1.1680 +    /* near_sad indexes:
  1.1681 +     *   0-cf above, 1-cf left, 2-cf aboveleft,
  1.1682 +     *   3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below
  1.1683 +     */
  1.1684 +    int near_sad[8] = {0};
  1.1685 +    BLOCK *b = &x->block[0];
  1.1686 +    unsigned char *src_y_ptr = *(b->base_src);
  1.1687 +
  1.1688 +    /* calculate sad for current frame 3 nearby MBs. */
  1.1689 +    if( xd->mb_to_top_edge==0 && xd->mb_to_left_edge ==0)
  1.1690 +    {
  1.1691 +        near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX;
  1.1692 +    }else if(xd->mb_to_top_edge==0)
  1.1693 +    {   /* only has left MB for sad calculation. */
  1.1694 +        near_sad[0] = near_sad[2] = INT_MAX;
  1.1695 +        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
  1.1696 +    }else if(xd->mb_to_left_edge ==0)
  1.1697 +    {   /* only has left MB for sad calculation. */
  1.1698 +        near_sad[1] = near_sad[2] = INT_MAX;
  1.1699 +        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
  1.1700 +    }else
  1.1701 +    {
  1.1702 +        near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16,xd->dst.y_stride, UINT_MAX);
  1.1703 +        near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - 16,xd->dst.y_stride, UINT_MAX);
  1.1704 +        near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride *16 -16,xd->dst.y_stride, UINT_MAX);
  1.1705 +    }
  1.1706 +
  1.1707 +    if(cpi->common.last_frame_type != KEY_FRAME)
  1.1708 +    {
  1.1709 +        /* calculate sad for last frame 5 nearby MBs. */
  1.1710 +        unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset;
  1.1711 +        int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride;
  1.1712 +
  1.1713 +        if(xd->mb_to_top_edge==0) near_sad[4] = INT_MAX;
  1.1714 +        if(xd->mb_to_left_edge ==0) near_sad[5] = INT_MAX;
  1.1715 +        if(xd->mb_to_right_edge ==0) near_sad[6] = INT_MAX;
  1.1716 +        if(xd->mb_to_bottom_edge==0) near_sad[7] = INT_MAX;
  1.1717 +
  1.1718 +        if(near_sad[4] != INT_MAX)
  1.1719 +            near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride *16, pre_y_stride, UINT_MAX);
  1.1720 +        if(near_sad[5] != INT_MAX)
  1.1721 +            near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride, UINT_MAX);
  1.1722 +        near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride, UINT_MAX);
  1.1723 +        if(near_sad[6] != INT_MAX)
  1.1724 +            near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride, UINT_MAX);
  1.1725 +        if(near_sad[7] != INT_MAX)
  1.1726 +            near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride *16, pre_y_stride, UINT_MAX);
  1.1727 +    }
  1.1728 +
  1.1729 +    if(cpi->common.last_frame_type != KEY_FRAME)
  1.1730 +    {
  1.1731 +        insertsortsad(near_sad, near_sadidx, 8);
  1.1732 +    }else
  1.1733 +    {
  1.1734 +        insertsortsad(near_sad, near_sadidx, 3);
  1.1735 +    }
  1.1736 +}
  1.1737 +
  1.1738 +static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv)
  1.1739 +{
  1.1740 +    if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
  1.1741 +    {
  1.1742 +        int i;
  1.1743 +
  1.1744 +        for (i = 0; i < x->partition_info->count; i++)
  1.1745 +        {
  1.1746 +            if (x->partition_info->bmi[i].mode == NEW4X4)
  1.1747 +            {
  1.1748 +                x->MVcount[0][mv_max+((x->partition_info->bmi[i].mv.as_mv.row
  1.1749 +                                          - best_ref_mv->as_mv.row) >> 1)]++;
  1.1750 +                x->MVcount[1][mv_max+((x->partition_info->bmi[i].mv.as_mv.col
  1.1751 +                                          - best_ref_mv->as_mv.col) >> 1)]++;
  1.1752 +            }
  1.1753 +        }
  1.1754 +    }
  1.1755 +    else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV)
  1.1756 +    {
  1.1757 +        x->MVcount[0][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row
  1.1758 +                                          - best_ref_mv->as_mv.row) >> 1)]++;
  1.1759 +        x->MVcount[1][mv_max+((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col
  1.1760 +                                          - best_ref_mv->as_mv.col) >> 1)]++;
  1.1761 +    }
  1.1762 +}
  1.1763 +
  1.1764 +static int evaluate_inter_mode_rd(int mdcounts[4],
  1.1765 +                                  RATE_DISTORTION* rd,
  1.1766 +                                  int* disable_skip,
  1.1767 +                                  VP8_COMP *cpi, MACROBLOCK *x)
  1.1768 +{
  1.1769 +    MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
  1.1770 +    BLOCK *b = &x->block[0];
  1.1771 +    MACROBLOCKD *xd = &x->e_mbd;
  1.1772 +    int distortion;
  1.1773 +    vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
  1.1774 +
  1.1775 +    if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
  1.1776 +        x->skip = 1;
  1.1777 +    }
  1.1778 +    else if (x->encode_breakout)
  1.1779 +    {
  1.1780 +        unsigned int sse;
  1.1781 +        unsigned int var;
  1.1782 +        unsigned int threshold = (xd->block[0].dequant[1]
  1.1783 +                    * xd->block[0].dequant[1] >>4);
  1.1784 +
  1.1785 +        if(threshold < x->encode_breakout)
  1.1786 +            threshold = x->encode_breakout;
  1.1787 +
  1.1788 +        var = vp8_variance16x16
  1.1789 +                (*(b->base_src), b->src_stride,
  1.1790 +                x->e_mbd.predictor, 16, &sse);
  1.1791 +
  1.1792 +        if (sse < threshold)
  1.1793 +        {
  1.1794 +             unsigned int q2dc = xd->block[24].dequant[0];
  1.1795 +            /* If theres is no codeable 2nd order dc
  1.1796 +               or a very small uniform pixel change change */
  1.1797 +            if ((sse - var < q2dc * q2dc >>4) ||
  1.1798 +                (sse /2 > var && sse-var < 64))
  1.1799 +            {
  1.1800 +                /* Check u and v to make sure skip is ok */
  1.1801 +                unsigned int sse2 = VP8_UVSSE(x);
  1.1802 +                if (sse2 * 2 < threshold)
  1.1803 +                {
  1.1804 +                    x->skip = 1;
  1.1805 +                    rd->distortion2 = sse + sse2;
  1.1806 +                    rd->rate2 = 500;
  1.1807 +
  1.1808 +                    /* for best_yrd calculation */
  1.1809 +                    rd->rate_uv = 0;
  1.1810 +                    rd->distortion_uv = sse2;
  1.1811 +
  1.1812 +                    *disable_skip = 1;
  1.1813 +                    return RDCOST(x->rdmult, x->rddiv, rd->rate2,
  1.1814 +                                  rd->distortion2);
  1.1815 +                }
  1.1816 +            }
  1.1817 +        }
  1.1818 +    }
  1.1819 +
  1.1820 +
  1.1821 +    /* Add in the Mv/mode cost */
  1.1822 +    rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
  1.1823 +
  1.1824 +    /* Y cost and distortion */
  1.1825 +    macro_block_yrd(x, &rd->rate_y, &distortion);
  1.1826 +    rd->rate2 += rd->rate_y;
  1.1827 +    rd->distortion2 += distortion;
  1.1828 +
  1.1829 +    /* UV cost and distortion */
  1.1830 +    rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv,
  1.1831 +                     cpi->common.full_pixel);
  1.1832 +    rd->rate2 += rd->rate_uv;
  1.1833 +    rd->distortion2 += rd->distortion_uv;
  1.1834 +    return INT_MAX;
  1.1835 +}
  1.1836 +
  1.1837 +static int calculate_final_rd_costs(int this_rd,
  1.1838 +                                    RATE_DISTORTION* rd,
  1.1839 +                                    int* other_cost,
  1.1840 +                                    int disable_skip,
  1.1841 +                                    int uv_intra_tteob,
  1.1842 +                                    int intra_rd_penalty,
  1.1843 +                                    VP8_COMP *cpi, MACROBLOCK *x)
  1.1844 +{
  1.1845 +    MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
  1.1846 +
  1.1847 +    /* Where skip is allowable add in the default per mb cost for the no
  1.1848 +     * skip case. where we then decide to skip we have to delete this and
  1.1849 +     * replace it with the cost of signalling a skip
  1.1850 +     */
  1.1851 +    if (cpi->common.mb_no_coeff_skip)
  1.1852 +    {
  1.1853 +        *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0);
  1.1854 +        rd->rate2 += *other_cost;
  1.1855 +    }
  1.1856 +
  1.1857 +    /* Estimate the reference frame signaling cost and add it
  1.1858 +     * to the rolling cost variable.
  1.1859 +     */
  1.1860 +    rd->rate2 +=
  1.1861 +        x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
  1.1862 +
  1.1863 +    if (!disable_skip)
  1.1864 +    {
  1.1865 +        /* Test for the condition where skip block will be activated
  1.1866 +         * because there are no non zero coefficients and make any
  1.1867 +         * necessary adjustment for rate
  1.1868 +         */
  1.1869 +        if (cpi->common.mb_no_coeff_skip)
  1.1870 +        {
  1.1871 +            int i;
  1.1872 +            int tteob;
  1.1873 +            int has_y2_block = (this_mode!=SPLITMV && this_mode!=B_PRED);
  1.1874 +
  1.1875 +            tteob = 0;
  1.1876 +            if(has_y2_block)
  1.1877 +                tteob += x->e_mbd.eobs[24];
  1.1878 +
  1.1879 +            for (i = 0; i < 16; i++)
  1.1880 +                tteob += (x->e_mbd.eobs[i] > has_y2_block);
  1.1881 +
  1.1882 +            if (x->e_mbd.mode_info_context->mbmi.ref_frame)
  1.1883 +            {
  1.1884 +                for (i = 16; i < 24; i++)
  1.1885 +                    tteob += x->e_mbd.eobs[i];
  1.1886 +            }
  1.1887 +            else
  1.1888 +                tteob += uv_intra_tteob;
  1.1889 +
  1.1890 +            if (tteob == 0)
  1.1891 +            {
  1.1892 +                rd->rate2 -= (rd->rate_y + rd->rate_uv);
  1.1893 +                /* for best_yrd calculation */
  1.1894 +                rd->rate_uv = 0;
  1.1895 +
  1.1896 +                /* Back out no skip flag costing and add in skip flag costing */
  1.1897 +                if (cpi->prob_skip_false)
  1.1898 +                {
  1.1899 +                    int prob_skip_cost;
  1.1900 +
  1.1901 +                    prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
  1.1902 +                    prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
  1.1903 +                    rd->rate2 += prob_skip_cost;
  1.1904 +                    *other_cost += prob_skip_cost;
  1.1905 +                }
  1.1906 +            }
  1.1907 +        }
  1.1908 +        /* Calculate the final RD estimate for this mode */
  1.1909 +        this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2);
  1.1910 +        if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame
  1.1911 +                                 == INTRA_FRAME)
  1.1912 +            this_rd += intra_rd_penalty;
  1.1913 +    }
  1.1914 +    return this_rd;
  1.1915 +}
  1.1916 +
  1.1917 +static void update_best_mode(BEST_MODE* best_mode, int this_rd,
  1.1918 +                             RATE_DISTORTION* rd, int other_cost, MACROBLOCK *x)
  1.1919 +{
  1.1920 +    MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode;
  1.1921 +
  1.1922 +    other_cost +=
  1.1923 +    x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
  1.1924 +
  1.1925 +    /* Calculate the final y RD estimate for this mode */
  1.1926 +    best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2-rd->rate_uv-other_cost),
  1.1927 +                      (rd->distortion2-rd->distortion_uv));
  1.1928 +
  1.1929 +    best_mode->rd = this_rd;
  1.1930 +    vpx_memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
  1.1931 +    vpx_memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO));
  1.1932 +
  1.1933 +    if ((this_mode == B_PRED) || (this_mode == SPLITMV))
  1.1934 +    {
  1.1935 +        int i;
  1.1936 +        for (i = 0; i < 16; i++)
  1.1937 +        {
  1.1938 +            best_mode->bmodes[i] = x->e_mbd.block[i].bmi;
  1.1939 +        }
  1.1940 +    }
  1.1941 +}
  1.1942 +
  1.1943 +void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
  1.1944 +                            int recon_uvoffset, int *returnrate,
  1.1945 +                            int *returndistortion, int *returnintra)
  1.1946 +{
  1.1947 +    BLOCK *b = &x->block[0];
  1.1948 +    BLOCKD *d = &x->e_mbd.block[0];
  1.1949 +    MACROBLOCKD *xd = &x->e_mbd;
  1.1950 +    int_mv best_ref_mv_sb[2];
  1.1951 +    int_mv mode_mv_sb[2][MB_MODE_COUNT];
  1.1952 +    int_mv best_ref_mv;
  1.1953 +    int_mv *mode_mv;
  1.1954 +    MB_PREDICTION_MODE this_mode;
  1.1955 +    int num00;
  1.1956 +    int best_mode_index = 0;
  1.1957 +    BEST_MODE best_mode;
  1.1958 +
  1.1959 +    int i;
  1.1960 +    int mode_index;
  1.1961 +    int mdcounts[4];
  1.1962 +    int rate;
  1.1963 +    RATE_DISTORTION rd;
  1.1964 +    int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly;
  1.1965 +    int uv_intra_tteob = 0;
  1.1966 +    int uv_intra_done = 0;
  1.1967 +
  1.1968 +    MB_PREDICTION_MODE uv_intra_mode = 0;
  1.1969 +    int_mv mvp;
  1.1970 +    int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
  1.1971 +    int saddone=0;
  1.1972 +    /* search range got from mv_pred(). It uses step_param levels. (0-7) */
  1.1973 +    int sr=0;
  1.1974 +
  1.1975 +    unsigned char *plane[4][3];
  1.1976 +    int ref_frame_map[4];
  1.1977 +    int sign_bias = 0;
  1.1978 +
  1.1979 +    int intra_rd_penalty =  10* vp8_dc_quant(cpi->common.base_qindex,
  1.1980 +                                             cpi->common.y1dc_delta_q);
  1.1981 +
  1.1982 +#if CONFIG_TEMPORAL_DENOISING
  1.1983 +    unsigned int zero_mv_sse = INT_MAX, best_sse = INT_MAX,
  1.1984 +            best_rd_sse = INT_MAX;
  1.1985 +#endif
  1.1986 +
  1.1987 +    mode_mv = mode_mv_sb[sign_bias];
  1.1988 +    best_ref_mv.as_int = 0;
  1.1989 +    best_mode.rd = INT_MAX;
  1.1990 +    best_mode.yrd = INT_MAX;
  1.1991 +    best_mode.intra_rd = INT_MAX;
  1.1992 +    vpx_memset(mode_mv_sb, 0, sizeof(mode_mv_sb));
  1.1993 +    vpx_memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode));
  1.1994 +    vpx_memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes));
  1.1995 +
  1.1996 +    /* Setup search priorities */
  1.1997 +    get_reference_search_order(cpi, ref_frame_map);
  1.1998 +
  1.1999 +    /* Check to see if there is at least 1 valid reference frame that we need
  1.2000 +     * to calculate near_mvs.
  1.2001 +     */
  1.2002 +    if (ref_frame_map[1] > 0)
  1.2003 +    {
  1.2004 +        sign_bias = vp8_find_near_mvs_bias(&x->e_mbd,
  1.2005 +                                           x->e_mbd.mode_info_context,
  1.2006 +                                           mode_mv_sb,
  1.2007 +                                           best_ref_mv_sb,
  1.2008 +                                           mdcounts,
  1.2009 +                                           ref_frame_map[1],
  1.2010 +                                           cpi->common.ref_frame_sign_bias);
  1.2011 +
  1.2012 +        mode_mv = mode_mv_sb[sign_bias];
  1.2013 +        best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
  1.2014 +    }
  1.2015 +
  1.2016 +    get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
  1.2017 +
  1.2018 +    *returnintra = INT_MAX;
  1.2019 +    /* Count of the number of MBs tested so far this frame */
  1.2020 +    x->mbs_tested_so_far++;
  1.2021 +
  1.2022 +    x->skip = 0;
  1.2023 +
  1.2024 +    for (mode_index = 0; mode_index < MAX_MODES; mode_index++)
  1.2025 +    {
  1.2026 +        int this_rd = INT_MAX;
  1.2027 +        int disable_skip = 0;
  1.2028 +        int other_cost = 0;
  1.2029 +        int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]];
  1.2030 +
  1.2031 +        /* Test best rd so far against threshold for trying this mode. */
  1.2032 +        if (best_mode.rd <= x->rd_threshes[mode_index])
  1.2033 +            continue;
  1.2034 +
  1.2035 +        if (this_ref_frame < 0)
  1.2036 +            continue;
  1.2037 +
  1.2038 +        /* These variables hold are rolling total cost and distortion for
  1.2039 +         * this mode
  1.2040 +         */
  1.2041 +        rd.rate2 = 0;
  1.2042 +        rd.distortion2 = 0;
  1.2043 +
  1.2044 +        this_mode = vp8_mode_order[mode_index];
  1.2045 +
  1.2046 +        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
  1.2047 +        x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
  1.2048 +
  1.2049 +        /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
  1.2050 +         * unless ARNR filtering is enabled in which case we want
  1.2051 +         * an unfiltered alternative
  1.2052 +         */
  1.2053 +        if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
  1.2054 +        {
  1.2055 +            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
  1.2056 +                continue;
  1.2057 +        }
  1.2058 +
  1.2059 +        /* everything but intra */
  1.2060 +        if (x->e_mbd.mode_info_context->mbmi.ref_frame)
  1.2061 +        {
  1.2062 +            x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
  1.2063 +            x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
  1.2064 +            x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
  1.2065 +
  1.2066 +            if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame])
  1.2067 +            {
  1.2068 +                sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame];
  1.2069 +                mode_mv = mode_mv_sb[sign_bias];
  1.2070 +                best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int;
  1.2071 +            }
  1.2072 +        }
  1.2073 +
  1.2074 +        /* Check to see if the testing frequency for this mode is at its
  1.2075 +         * max If so then prevent it from being tested and increase the
  1.2076 +         * threshold for its testing
  1.2077 +         */
  1.2078 +        if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1))
  1.2079 +        {
  1.2080 +            if (x->mbs_tested_so_far  <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])
  1.2081 +            {
  1.2082 +                /* Increase the threshold for coding this mode to make it
  1.2083 +                 * less likely to be chosen
  1.2084 +                 */
  1.2085 +                x->rd_thresh_mult[mode_index] += 4;
  1.2086 +
  1.2087 +                if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
  1.2088 +                    x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
  1.2089 +
  1.2090 +                x->rd_threshes[mode_index] =
  1.2091 +                    (cpi->rd_baseline_thresh[mode_index] >> 7) *
  1.2092 +                    x->rd_thresh_mult[mode_index];
  1.2093 +
  1.2094 +                continue;
  1.2095 +            }
  1.2096 +        }
  1.2097 +
  1.2098 +        /* We have now reached the point where we are going to test the
  1.2099 +         * current mode so increment the counter for the number of times
  1.2100 +         * it has been tested
  1.2101 +         */
  1.2102 +        x->mode_test_hit_counts[mode_index] ++;
  1.2103 +
  1.2104 +        /* Experimental code. Special case for gf and arf zeromv modes.
  1.2105 +         * Increase zbin size to supress noise
  1.2106 +         */
  1.2107 +        if (x->zbin_mode_boost_enabled)
  1.2108 +        {
  1.2109 +            if ( this_ref_frame == INTRA_FRAME )
  1.2110 +                x->zbin_mode_boost = 0;
  1.2111 +            else
  1.2112 +            {
  1.2113 +                if (vp8_mode_order[mode_index] == ZEROMV)
  1.2114 +                {
  1.2115 +                    if (this_ref_frame != LAST_FRAME)
  1.2116 +                        x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
  1.2117 +                    else
  1.2118 +                        x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
  1.2119 +                }
  1.2120 +                else if (vp8_mode_order[mode_index] == SPLITMV)
  1.2121 +                    x->zbin_mode_boost = 0;
  1.2122 +                else
  1.2123 +                    x->zbin_mode_boost = MV_ZBIN_BOOST;
  1.2124 +            }
  1.2125 +
  1.2126 +            vp8_update_zbin_extra(cpi, x);
  1.2127 +        }
  1.2128 +
  1.2129 +        if(!uv_intra_done && this_ref_frame == INTRA_FRAME)
  1.2130 +        {
  1.2131 +            rd_pick_intra_mbuv_mode(x, &uv_intra_rate,
  1.2132 +                                    &uv_intra_rate_tokenonly,
  1.2133 +                                    &uv_intra_distortion);
  1.2134 +            uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
  1.2135 +
  1.2136 +            /*
  1.2137 +             * Total of the eobs is used later to further adjust rate2. Since uv
  1.2138 +             * block's intra eobs will be overwritten when we check inter modes,
  1.2139 +             * we need to save uv_intra_tteob here.
  1.2140 +             */
  1.2141 +            for (i = 16; i < 24; i++)
  1.2142 +                uv_intra_tteob += x->e_mbd.eobs[i];
  1.2143 +
  1.2144 +            uv_intra_done = 1;
  1.2145 +        }
  1.2146 +
  1.2147 +        switch (this_mode)
  1.2148 +        {
  1.2149 +        case B_PRED:
  1.2150 +        {
  1.2151 +            int tmp_rd;
  1.2152 +
  1.2153 +            /* Note the rate value returned here includes the cost of
  1.2154 +             * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED]
  1.2155 +             */
  1.2156 +            int distortion;
  1.2157 +            tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd);
  1.2158 +            rd.rate2 += rate;
  1.2159 +            rd.distortion2 += distortion;
  1.2160 +
  1.2161 +            if(tmp_rd < best_mode.yrd)
  1.2162 +            {
  1.2163 +                rd.rate2 += uv_intra_rate;
  1.2164 +                rd.rate_uv = uv_intra_rate_tokenonly;
  1.2165 +                rd.distortion2 += uv_intra_distortion;
  1.2166 +                rd.distortion_uv = uv_intra_distortion;
  1.2167 +            }
  1.2168 +            else
  1.2169 +            {
  1.2170 +                this_rd = INT_MAX;
  1.2171 +                disable_skip = 1;
  1.2172 +            }
  1.2173 +        }
  1.2174 +        break;
  1.2175 +
  1.2176 +        case SPLITMV:
  1.2177 +        {
  1.2178 +            int tmp_rd;
  1.2179 +            int this_rd_thresh;
  1.2180 +            int distortion;
  1.2181 +
  1.2182 +            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ?
  1.2183 +                x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3];
  1.2184 +            this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ?
  1.2185 +                x->rd_threshes[THR_NEW2] : this_rd_thresh;
  1.2186 +
  1.2187 +            tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
  1.2188 +                                                     best_mode.yrd, mdcounts,
  1.2189 +                                                     &rate, &rd.rate_y, &distortion, this_rd_thresh) ;
  1.2190 +
  1.2191 +            rd.rate2 += rate;
  1.2192 +            rd.distortion2 += distortion;
  1.2193 +
  1.2194 +            /* If even the 'Y' rd value of split is higher than best so far
  1.2195 +             * then dont bother looking at UV
  1.2196 +             */
  1.2197 +            if (tmp_rd < best_mode.yrd)
  1.2198 +            {
  1.2199 +                /* Now work out UV cost and add it in */
  1.2200 +                rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel);
  1.2201 +                rd.rate2 += rd.rate_uv;
  1.2202 +                rd.distortion2 += rd.distortion_uv;
  1.2203 +            }
  1.2204 +            else
  1.2205 +            {
  1.2206 +                this_rd = INT_MAX;
  1.2207 +                disable_skip = 1;
  1.2208 +            }
  1.2209 +        }
  1.2210 +        break;
  1.2211 +        case DC_PRED:
  1.2212 +        case V_PRED:
  1.2213 +        case H_PRED:
  1.2214 +        case TM_PRED:
  1.2215 +        {
  1.2216 +            int distortion;
  1.2217 +            x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
  1.2218 +
  1.2219 +            vp8_build_intra_predictors_mby_s(xd,
  1.2220 +                                             xd->dst.y_buffer - xd->dst.y_stride,
  1.2221 +                                             xd->dst.y_buffer - 1,
  1.2222 +                                             xd->dst.y_stride,
  1.2223 +                                             xd->predictor,
  1.2224 +                                             16);
  1.2225 +            macro_block_yrd(x, &rd.rate_y, &distortion) ;
  1.2226 +            rd.rate2 += rd.rate_y;
  1.2227 +            rd.distortion2 += distortion;
  1.2228 +            rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
  1.2229 +            rd.rate2 += uv_intra_rate;
  1.2230 +            rd.rate_uv = uv_intra_rate_tokenonly;
  1.2231 +            rd.distortion2 += uv_intra_distortion;
  1.2232 +            rd.distortion_uv = uv_intra_distortion;
  1.2233 +        }
  1.2234 +        break;
  1.2235 +
  1.2236 +        case NEWMV:
  1.2237 +        {
  1.2238 +            int thissme;
  1.2239 +            int bestsme = INT_MAX;
  1.2240 +            int step_param = cpi->sf.first_step;
  1.2241 +            int further_steps;
  1.2242 +            int n;
  1.2243 +            int do_refine=1;   /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
  1.2244 +                                  we will do a final 1-away diamond refining search  */
  1.2245 +
  1.2246 +            int sadpb = x->sadperbit16;
  1.2247 +            int_mv mvp_full;
  1.2248 +
  1.2249 +            int col_min = ((best_ref_mv.as_mv.col+7)>>3) - MAX_FULL_PEL_VAL;
  1.2250 +            int row_min = ((best_ref_mv.as_mv.row+7)>>3) - MAX_FULL_PEL_VAL;
  1.2251 +            int col_max = (best_ref_mv.as_mv.col>>3) + MAX_FULL_PEL_VAL;
  1.2252 +            int row_max = (best_ref_mv.as_mv.row>>3) + MAX_FULL_PEL_VAL;
  1.2253 +
  1.2254 +            int tmp_col_min = x->mv_col_min;
  1.2255 +            int tmp_col_max = x->mv_col_max;
  1.2256 +            int tmp_row_min = x->mv_row_min;
  1.2257 +            int tmp_row_max = x->mv_row_max;
  1.2258 +
  1.2259 +            if(!saddone)
  1.2260 +            {
  1.2261 +                vp8_cal_sad(cpi,xd,x, recon_yoffset ,&near_sadidx[0] );
  1.2262 +                saddone = 1;
  1.2263 +            }
  1.2264 +
  1.2265 +            vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp,
  1.2266 +                        x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
  1.2267 +
  1.2268 +            mvp_full.as_mv.col = mvp.as_mv.col>>3;
  1.2269 +            mvp_full.as_mv.row = mvp.as_mv.row>>3;
  1.2270 +
  1.2271 +            /* Get intersection of UMV window and valid MV window to
  1.2272 +             * reduce # of checks in diamond search.
  1.2273 +             */
  1.2274 +            if (x->mv_col_min < col_min )
  1.2275 +                x->mv_col_min = col_min;
  1.2276 +            if (x->mv_col_max > col_max )
  1.2277 +                x->mv_col_max = col_max;
  1.2278 +            if (x->mv_row_min < row_min )
  1.2279 +                x->mv_row_min = row_min;
  1.2280 +            if (x->mv_row_max > row_max )
  1.2281 +                x->mv_row_max = row_max;
  1.2282 +
  1.2283 +            /* adjust search range according to sr from mv prediction */
  1.2284 +            if(sr > step_param)
  1.2285 +                step_param = sr;
  1.2286 +
  1.2287 +            /* Initial step/diamond search */
  1.2288 +            {
  1.2289 +                bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.mv,
  1.2290 +                                        step_param, sadpb, &num00,
  1.2291 +                                        &cpi->fn_ptr[BLOCK_16X16],
  1.2292 +                                        x->mvcost, &best_ref_mv);
  1.2293 +                mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
  1.2294 +
  1.2295 +                /* Further step/diamond searches as necessary */
  1.2296 +                n = 0;
  1.2297 +                further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
  1.2298 +
  1.2299 +                n = num00;
  1.2300 +                num00 = 0;
  1.2301 +
  1.2302 +                /* If there won't be more n-step search, check to see if refining search is needed. */
  1.2303 +                if (n > further_steps)
  1.2304 +                    do_refine = 0;
  1.2305 +
  1.2306 +                while (n < further_steps)
  1.2307 +                {
  1.2308 +                    n++;
  1.2309 +
  1.2310 +                    if (num00)
  1.2311 +                        num00--;
  1.2312 +                    else
  1.2313 +                    {
  1.2314 +                        thissme = cpi->diamond_search_sad(x, b, d, &mvp_full,
  1.2315 +                                    &d->bmi.mv, step_param + n, sadpb, &num00,
  1.2316 +                                    &cpi->fn_ptr[BLOCK_16X16], x->mvcost,
  1.2317 +                                    &best_ref_mv);
  1.2318 +
  1.2319 +                        /* check to see if refining search is needed. */
  1.2320 +                        if (num00 > (further_steps-n))
  1.2321 +                            do_refine = 0;
  1.2322 +
  1.2323 +                        if (thissme < bestsme)
  1.2324 +                        {
  1.2325 +                            bestsme = thissme;
  1.2326 +                            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
  1.2327 +                        }
  1.2328 +                        else
  1.2329 +                        {
  1.2330 +                            d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
  1.2331 +                        }
  1.2332 +                    }
  1.2333 +                }
  1.2334 +            }
  1.2335 +
  1.2336 +            /* final 1-away diamond refining search */
  1.2337 +            if (do_refine == 1)
  1.2338 +            {
  1.2339 +                int search_range;
  1.2340 +
  1.2341 +                search_range = 8;
  1.2342 +
  1.2343 +                thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv, sadpb,
  1.2344 +                                       search_range, &cpi->fn_ptr[BLOCK_16X16],
  1.2345 +                                       x->mvcost, &best_ref_mv);
  1.2346 +
  1.2347 +                if (thissme < bestsme)
  1.2348 +                {
  1.2349 +                    bestsme = thissme;
  1.2350 +                    mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
  1.2351 +                }
  1.2352 +                else
  1.2353 +                {
  1.2354 +                    d->bmi.mv.as_int = mode_mv[NEWMV].as_int;
  1.2355 +                }
  1.2356 +            }
  1.2357 +
  1.2358 +            x->mv_col_min = tmp_col_min;
  1.2359 +            x->mv_col_max = tmp_col_max;
  1.2360 +            x->mv_row_min = tmp_row_min;
  1.2361 +            x->mv_row_max = tmp_row_max;
  1.2362 +
  1.2363 +            if (bestsme < INT_MAX)
  1.2364 +            {
  1.2365 +                int dis; /* TODO: use dis in distortion calculation later. */
  1.2366 +                unsigned int sse;
  1.2367 +                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv, &best_ref_mv,
  1.2368 +                                             x->errorperbit,
  1.2369 +                                             &cpi->fn_ptr[BLOCK_16X16],
  1.2370 +                                             x->mvcost, &dis, &sse);
  1.2371 +            }
  1.2372 +
  1.2373 +            mode_mv[NEWMV].as_int = d->bmi.mv.as_int;
  1.2374 +
  1.2375 +            /* Add the new motion vector cost to our rolling cost variable */
  1.2376 +            rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
  1.2377 +        }
  1.2378 +
  1.2379 +        case NEARESTMV:
  1.2380 +        case NEARMV:
  1.2381 +            /* Clip "next_nearest" so that it does not extend to far out
  1.2382 +             * of image
  1.2383 +             */
  1.2384 +            vp8_clamp_mv2(&mode_mv[this_mode], xd);
  1.2385 +
  1.2386 +            /* Do not bother proceeding if the vector (from newmv, nearest
  1.2387 +             * or near) is 0,0 as this should then be coded using the zeromv
  1.2388 +             * mode.
  1.2389 +             */
  1.2390 +            if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0))
  1.2391 +                continue;
  1.2392 +
  1.2393 +        case ZEROMV:
  1.2394 +
  1.2395 +            /* Trap vectors that reach beyond the UMV borders
  1.2396 +             * Note that ALL New MV, Nearest MV Near MV and Zero MV code
  1.2397 +             * drops through to this point because of the lack of break
  1.2398 +             * statements in the previous two cases.
  1.2399 +             */
  1.2400 +            if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
  1.2401 +                ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max))
  1.2402 +                continue;
  1.2403 +
  1.2404 +            vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
  1.2405 +            this_rd = evaluate_inter_mode_rd(mdcounts, &rd,
  1.2406 +                                             &disable_skip, cpi, x);
  1.2407 +            break;
  1.2408 +
  1.2409 +        default:
  1.2410 +            break;
  1.2411 +        }
  1.2412 +
  1.2413 +        this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
  1.2414 +                                           disable_skip, uv_intra_tteob,
  1.2415 +                                           intra_rd_penalty, cpi, x);
  1.2416 +
  1.2417 +        /* Keep record of best intra distortion */
  1.2418 +        if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
  1.2419 +            (this_rd < best_mode.intra_rd) )
  1.2420 +        {
  1.2421 +          best_mode.intra_rd = this_rd;
  1.2422 +            *returnintra = rd.distortion2 ;
  1.2423 +        }
  1.2424 +#if CONFIG_TEMPORAL_DENOISING
  1.2425 +        if (cpi->oxcf.noise_sensitivity)
  1.2426 +        {
  1.2427 +            unsigned int sse;
  1.2428 +            vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&sse,
  1.2429 +                                   mode_mv[this_mode]);
  1.2430 +
  1.2431 +            if (sse < best_rd_sse)
  1.2432 +                best_rd_sse = sse;
  1.2433 +
  1.2434 +            /* Store for later use by denoiser. */
  1.2435 +            if (this_mode == ZEROMV && sse < zero_mv_sse )
  1.2436 +            {
  1.2437 +                zero_mv_sse = sse;
  1.2438 +                x->best_zeromv_reference_frame =
  1.2439 +                        x->e_mbd.mode_info_context->mbmi.ref_frame;
  1.2440 +            }
  1.2441 +
  1.2442 +            /* Store the best NEWMV in x for later use in the denoiser. */
  1.2443 +            if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV &&
  1.2444 +                    sse < best_sse)
  1.2445 +            {
  1.2446 +                best_sse = sse;
  1.2447 +                vp8_get_inter_mbpred_error(x,&cpi->fn_ptr[BLOCK_16X16],&best_sse,
  1.2448 +                                       mode_mv[this_mode]);
  1.2449 +                x->best_sse_inter_mode = NEWMV;
  1.2450 +                x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv;
  1.2451 +                x->need_to_clamp_best_mvs =
  1.2452 +                    x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs;
  1.2453 +                x->best_reference_frame =
  1.2454 +                    x->e_mbd.mode_info_context->mbmi.ref_frame;
  1.2455 +            }
  1.2456 +        }
  1.2457 +#endif
  1.2458 +
  1.2459 +        /* Did this mode help.. i.i is it the new best mode */
  1.2460 +        if (this_rd < best_mode.rd || x->skip)
  1.2461 +        {
  1.2462 +            /* Note index of best mode so far */
  1.2463 +            best_mode_index = mode_index;
  1.2464 +            *returnrate = rd.rate2;
  1.2465 +            *returndistortion = rd.distortion2;
  1.2466 +            if (this_mode <= B_PRED)
  1.2467 +            {
  1.2468 +                x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
  1.2469 +                /* required for left and above block mv */
  1.2470 +                x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
  1.2471 +            }
  1.2472 +            update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
  1.2473 +
  1.2474 +
  1.2475 +            /* Testing this mode gave rise to an improvement in best error
  1.2476 +             * score. Lower threshold a bit for next time
  1.2477 +             */
  1.2478 +            x->rd_thresh_mult[mode_index] =
  1.2479 +                (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
  1.2480 +                    x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
  1.2481 +        }
  1.2482 +
  1.2483 +        /* If the mode did not help improve the best error case then raise
  1.2484 +         * the threshold for testing that mode next time around.
  1.2485 +         */
  1.2486 +        else
  1.2487 +        {
  1.2488 +            x->rd_thresh_mult[mode_index] += 4;
  1.2489 +
  1.2490 +            if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
  1.2491 +                x->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
  1.2492 +        }
  1.2493 +        x->rd_threshes[mode_index] =
  1.2494 +            (cpi->rd_baseline_thresh[mode_index] >> 7) *
  1.2495 +                x->rd_thresh_mult[mode_index];
  1.2496 +
  1.2497 +        if (x->skip)
  1.2498 +            break;
  1.2499 +
  1.2500 +    }
  1.2501 +
  1.2502 +    /* Reduce the activation RD thresholds for the best choice mode */
  1.2503 +    if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2)))
  1.2504 +    {
  1.2505 +        int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2);
  1.2506 +
  1.2507 +        x->rd_thresh_mult[best_mode_index] =
  1.2508 +            (x->rd_thresh_mult[best_mode_index] >=
  1.2509 +                (MIN_THRESHMULT + best_adjustment)) ?
  1.2510 +                    x->rd_thresh_mult[best_mode_index] - best_adjustment :
  1.2511 +                    MIN_THRESHMULT;
  1.2512 +        x->rd_threshes[best_mode_index] =
  1.2513 +            (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
  1.2514 +                x->rd_thresh_mult[best_mode_index];
  1.2515 +    }
  1.2516 +
  1.2517 +#if CONFIG_TEMPORAL_DENOISING
  1.2518 +    if (cpi->oxcf.noise_sensitivity)
  1.2519 +    {
  1.2520 +        if (x->best_sse_inter_mode == DC_PRED)
  1.2521 +        {
  1.2522 +            /* No best MV found. */
  1.2523 +            x->best_sse_inter_mode = best_mode.mbmode.mode;
  1.2524 +            x->best_sse_mv = best_mode.mbmode.mv;
  1.2525 +            x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs;
  1.2526 +            x->best_reference_frame = best_mode.mbmode.ref_frame;
  1.2527 +            best_sse = best_rd_sse;
  1.2528 +        }
  1.2529 +        vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse,
  1.2530 +                                recon_yoffset, recon_uvoffset);
  1.2531 +
  1.2532 +
  1.2533 +        /* Reevaluate ZEROMV after denoising. */
  1.2534 +        if (best_mode.mbmode.ref_frame == INTRA_FRAME &&
  1.2535 +            x->best_zeromv_reference_frame != INTRA_FRAME)
  1.2536 +        {
  1.2537 +            int this_rd = INT_MAX;
  1.2538 +            int disable_skip = 0;
  1.2539 +            int other_cost = 0;
  1.2540 +            int this_ref_frame = x->best_zeromv_reference_frame;
  1.2541 +            rd.rate2 = x->ref_frame_cost[this_ref_frame] +
  1.2542 +                    vp8_cost_mv_ref(ZEROMV, mdcounts);
  1.2543 +            rd.distortion2 = 0;
  1.2544 +
  1.2545 +            /* set up the proper prediction buffers for the frame */
  1.2546 +            x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame;
  1.2547 +            x->e_mbd.pre.y_buffer = plane[this_ref_frame][0];
  1.2548 +            x->e_mbd.pre.u_buffer = plane[this_ref_frame][1];
  1.2549 +            x->e_mbd.pre.v_buffer = plane[this_ref_frame][2];
  1.2550 +
  1.2551 +            x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
  1.2552 +            x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
  1.2553 +            x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
  1.2554 +
  1.2555 +            this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x);
  1.2556 +            this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost,
  1.2557 +                                               disable_skip, uv_intra_tteob,
  1.2558 +                                               intra_rd_penalty, cpi, x);
  1.2559 +            if (this_rd < best_mode.rd || x->skip)
  1.2560 +            {
  1.2561 +                /* Note index of best mode so far */
  1.2562 +                best_mode_index = mode_index;
  1.2563 +                *returnrate = rd.rate2;
  1.2564 +                *returndistortion = rd.distortion2;
  1.2565 +                update_best_mode(&best_mode, this_rd, &rd, other_cost, x);
  1.2566 +            }
  1.2567 +        }
  1.2568 +
  1.2569 +    }
  1.2570 +#endif
  1.2571 +
  1.2572 +    if (cpi->is_src_frame_alt_ref &&
  1.2573 +        (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME))
  1.2574 +    {
  1.2575 +        x->e_mbd.mode_info_context->mbmi.mode = ZEROMV;
  1.2576 +        x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
  1.2577 +        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
  1.2578 +        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
  1.2579 +        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff =
  1.2580 +                                        (cpi->common.mb_no_coeff_skip);
  1.2581 +        x->e_mbd.mode_info_context->mbmi.partitioning = 0;
  1.2582 +        return;
  1.2583 +    }
  1.2584 +
  1.2585 +
  1.2586 +    /* macroblock modes */
  1.2587 +    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO));
  1.2588 +
  1.2589 +    if (best_mode.mbmode.mode == B_PRED)
  1.2590 +    {
  1.2591 +        for (i = 0; i < 16; i++)
  1.2592 +            xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode;
  1.2593 +    }
  1.2594 +
  1.2595 +    if (best_mode.mbmode.mode == SPLITMV)
  1.2596 +    {
  1.2597 +        for (i = 0; i < 16; i++)
  1.2598 +            xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int;
  1.2599 +
  1.2600 +        vpx_memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO));
  1.2601 +
  1.2602 +        x->e_mbd.mode_info_context->mbmi.mv.as_int =
  1.2603 +                                      x->partition_info->bmi[15].mv.as_int;
  1.2604 +    }
  1.2605 +
  1.2606 +    if (sign_bias
  1.2607 +        != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame])
  1.2608 +        best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int;
  1.2609 +
  1.2610 +    rd_update_mvcount(x, &best_ref_mv);
  1.2611 +}
  1.2612 +
  1.2613 +void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate_)
  1.2614 +{
  1.2615 +    int error4x4, error16x16;
  1.2616 +    int rate4x4, rate16x16 = 0, rateuv;
  1.2617 +    int dist4x4, dist16x16, distuv;
  1.2618 +    int rate;
  1.2619 +    int rate4x4_tokenonly = 0;
  1.2620 +    int rate16x16_tokenonly = 0;
  1.2621 +    int rateuv_tokenonly = 0;
  1.2622 +
  1.2623 +    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
  1.2624 +
  1.2625 +    rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv);
  1.2626 +    rate = rateuv;
  1.2627 +
  1.2628 +    error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly,
  1.2629 +                                            &dist16x16);
  1.2630 +
  1.2631 +    error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly,
  1.2632 +                                         &dist4x4, error16x16);
  1.2633 +
  1.2634 +    if (error4x4 < error16x16)
  1.2635 +    {
  1.2636 +        x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
  1.2637 +        rate += rate4x4;
  1.2638 +    }
  1.2639 +    else
  1.2640 +    {
  1.2641 +        rate += rate16x16;
  1.2642 +    }
  1.2643 +
  1.2644 +    *rate_ = rate;
  1.2645 +}

mercurial