media/libvpx/vp9/encoder/vp9_rdopt.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS.  All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include <stdio.h>
    12 #include <math.h>
    13 #include <limits.h>
    14 #include <assert.h>
    16 #include "vp9/common/vp9_pragmas.h"
    17 #include "vp9/encoder/vp9_tokenize.h"
    18 #include "vp9/encoder/vp9_treewriter.h"
    19 #include "vp9/encoder/vp9_onyx_int.h"
    20 #include "vp9/encoder/vp9_modecosts.h"
    21 #include "vp9/encoder/vp9_encodeintra.h"
    22 #include "vp9/common/vp9_entropymode.h"
    23 #include "vp9/common/vp9_reconinter.h"
    24 #include "vp9/common/vp9_reconintra.h"
    25 #include "vp9/common/vp9_findnearmv.h"
    26 #include "vp9/common/vp9_quant_common.h"
    27 #include "vp9/encoder/vp9_encodemb.h"
    28 #include "vp9/encoder/vp9_quantize.h"
    29 #include "vp9/encoder/vp9_variance.h"
    30 #include "vp9/encoder/vp9_mcomp.h"
    31 #include "vp9/encoder/vp9_rdopt.h"
    32 #include "vp9/encoder/vp9_ratectrl.h"
    33 #include "vpx_mem/vpx_mem.h"
    34 #include "vp9/common/vp9_systemdependent.h"
    35 #include "vp9/encoder/vp9_encodemv.h"
    36 #include "vp9/common/vp9_seg_common.h"
    37 #include "vp9/common/vp9_pred_common.h"
    38 #include "vp9/common/vp9_entropy.h"
    39 #include "./vp9_rtcd.h"
    40 #include "vp9/common/vp9_mvref_common.h"
    41 #include "vp9/common/vp9_common.h"
    43 #define INVALID_MV 0x80008000
    45 /* Factor to weigh the rate for switchable interp filters */
    46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
    48 #define LAST_FRAME_MODE_MASK    0xFFEDCD60
    49 #define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
    50 #define ALT_REF_MODE_MASK       0xFFC648D0
    52 #define MIN_EARLY_TERM_INDEX    3
    54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
    55   {NEARESTMV, LAST_FRAME,   NONE},
    56   {NEARESTMV, ALTREF_FRAME, NONE},
    57   {NEARESTMV, GOLDEN_FRAME, NONE},
    59   {DC_PRED,   INTRA_FRAME,  NONE},
    61   {NEWMV,     LAST_FRAME,   NONE},
    62   {NEWMV,     ALTREF_FRAME, NONE},
    63   {NEWMV,     GOLDEN_FRAME, NONE},
    65   {NEARMV,    LAST_FRAME,   NONE},
    66   {NEARMV,    ALTREF_FRAME, NONE},
    67   {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
    68   {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
    70   {TM_PRED,   INTRA_FRAME,  NONE},
    72   {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
    73   {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
    74   {NEARMV,    GOLDEN_FRAME, NONE},
    75   {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
    76   {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
    78   {ZEROMV,    LAST_FRAME,   NONE},
    79   {ZEROMV,    GOLDEN_FRAME, NONE},
    80   {ZEROMV,    ALTREF_FRAME, NONE},
    81   {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
    82   {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
    84   {H_PRED,    INTRA_FRAME,  NONE},
    85   {V_PRED,    INTRA_FRAME,  NONE},
    86   {D135_PRED, INTRA_FRAME,  NONE},
    87   {D207_PRED, INTRA_FRAME,  NONE},
    88   {D153_PRED, INTRA_FRAME,  NONE},
    89   {D63_PRED,  INTRA_FRAME,  NONE},
    90   {D117_PRED, INTRA_FRAME,  NONE},
    91   {D45_PRED,  INTRA_FRAME,  NONE},
    92 };
    94 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
    95   {LAST_FRAME,   NONE},
    96   {GOLDEN_FRAME, NONE},
    97   {ALTREF_FRAME, NONE},
    98   {LAST_FRAME,   ALTREF_FRAME},
    99   {GOLDEN_FRAME, ALTREF_FRAME},
   100   {INTRA_FRAME,  NONE},
   101 };
   103 // The baseline rd thresholds for breaking out of the rd loop for
   104 // certain modes are assumed to be based on 8x8 blocks.
   105 // This table is used to correct for blocks size.
   106 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
   107 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
   108   {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
   110 #define RD_THRESH_MAX_FACT 64
   111 #define RD_THRESH_INC      1
   112 #define RD_THRESH_POW      1.25
   113 #define RD_MULT_EPB_RATIO  64
   115 #define MV_COST_WEIGHT      108
   116 #define MV_COST_WEIGHT_SUB  120
   118 static void fill_token_costs(vp9_coeff_cost *c,
   119                              vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
   120   int i, j, k, l;
   121   TX_SIZE t;
   122   for (t = TX_4X4; t <= TX_32X32; t++)
   123     for (i = 0; i < BLOCK_TYPES; i++)
   124       for (j = 0; j < REF_TYPES; j++)
   125         for (k = 0; k < COEF_BANDS; k++)
   126           for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
   127             vp9_prob probs[ENTROPY_NODES];
   128             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
   129             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
   130                             vp9_coef_tree);
   131             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
   132                                  vp9_coef_tree);
   133             assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
   134                    c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
   135           }
   136 }
   138 static const int rd_iifactor[32] = {
   139   4, 4, 3, 2, 1, 0, 0, 0,
   140   0, 0, 0, 0, 0, 0, 0, 0,
   141   0, 0, 0, 0, 0, 0, 0, 0,
   142   0, 0, 0, 0, 0, 0, 0, 0,
   143 };
   145 // 3* dc_qlookup[Q]*dc_qlookup[Q];
   147 /* values are now correlated to quantizer */
   148 static int sad_per_bit16lut[QINDEX_RANGE];
   149 static int sad_per_bit4lut[QINDEX_RANGE];
   151 void vp9_init_me_luts() {
   152   int i;
   154   // Initialize the sad lut tables using a formulaic calculation for now
   155   // This is to make it easier to resolve the impact of experimental changes
   156   // to the quantizer tables.
   157   for (i = 0; i < QINDEX_RANGE; i++) {
   158     sad_per_bit16lut[i] =
   159       (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
   160     sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
   161   }
   162 }
   164 int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
   165   const int q = vp9_dc_quant(qindex, 0);
   166   // TODO(debargha): Adjust the function below
   167   int rdmult = 88 * q * q / 25;
   168   if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
   169     if (cpi->twopass.next_iiratio > 31)
   170       rdmult += (rdmult * rd_iifactor[31]) >> 4;
   171     else
   172       rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
   173   }
   174   return rdmult;
   175 }
   177 static int compute_rd_thresh_factor(int qindex) {
   178   int q;
   179   // TODO(debargha): Adjust the function below
   180   q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
   181   if (q < 8)
   182     q = 8;
   183   return q;
   184 }
   186 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
   187   cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
   188   cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
   189 }
   191 static void set_block_thresholds(VP9_COMP *cpi) {
   192   int i, bsize, segment_id;
   193   VP9_COMMON *cm = &cpi->common;
   195   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
   196     int q;
   197     int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
   198     segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
   199     q = compute_rd_thresh_factor(segment_qindex);
   201     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
   202       // Threshold here seem unecessarily harsh but fine given actual
   203       // range of values used for cpi->sf.thresh_mult[]
   204       int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
   206       for (i = 0; i < MAX_MODES; ++i) {
   207         if (cpi->sf.thresh_mult[i] < thresh_max) {
   208           cpi->rd_threshes[segment_id][bsize][i] =
   209               cpi->sf.thresh_mult[i] * q *
   210               rd_thresh_block_size_factor[bsize] / 4;
   211         } else {
   212           cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
   213         }
   214       }
   216       for (i = 0; i < MAX_REFS; ++i) {
   217         if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
   218           cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
   219               cpi->sf.thresh_mult_sub8x8[i] * q *
   220               rd_thresh_block_size_factor[bsize] / 4;
   221         } else {
   222           cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
   223         }
   224       }
   225     }
   226   }
   227 }
   229 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   230   VP9_COMMON *cm = &cpi->common;
   231   int qindex, i;
   233   vp9_clear_system_state();  // __asm emms;
   235   // Further tests required to see if optimum is different
   236   // for key frames, golden frames and arf frames.
   237   // if (cpi->common.refresh_golden_frame ||
   238   //     cpi->common.refresh_alt_ref_frame)
   239   qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
   241   cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
   242   cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
   244   cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
   245   cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
   247   vp9_set_speed_features(cpi);
   249   cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
   250                               cm->frame_type != KEY_FRAME) ?
   251                              0 : 1;
   253   set_block_thresholds(cpi);
   255   fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
   257   for (i = 0; i < PARTITION_CONTEXTS; i++)
   258     vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
   259                     vp9_partition_tree);
   261   /*rough estimate for costing*/
   262   vp9_init_mode_costs(cpi);
   264   if (!frame_is_intra_only(cm)) {
   265     vp9_build_nmv_cost_table(
   266         cpi->mb.nmvjointcost,
   267         cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
   268         &cm->fc.nmvc,
   269         cm->allow_high_precision_mv, 1, 1);
   271     for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
   272       MB_PREDICTION_MODE m;
   274       for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
   275         cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] =
   276             cost_token(vp9_inter_mode_tree,
   277                        cm->fc.inter_mode_probs[i],
   278                        &vp9_inter_mode_encodings[INTER_OFFSET(m)]);
   279     }
   280   }
   281 }
   283 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
   284                                        const double *tab1, const double *tab2,
   285                                        double *v1, double *v2) {
   286   double y = x * inv_step;
   287   int d = (int) y;
   288   if (d >= ntab - 1) {
   289     *v1 = tab1[ntab - 1];
   290     *v2 = tab2[ntab - 1];
   291   } else {
   292     double a = y - d;
   293     *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
   294     *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
   295   }
   296 }
   298 static void model_rd_norm(double x, double *R, double *D) {
   299   static const int inv_tab_step = 8;
   300   static const int tab_size = 120;
   301   // NOTE: The tables below must be of the same size
   302   //
   303   // Normalized rate
   304   // This table models the rate for a Laplacian source
   305   // source with given variance when quantized with a uniform quantizer
   306   // with given stepsize. The closed form expression is:
   307   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
   308   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
   309   // and H(x) is the binary entropy function.
   310   static const double rate_tab[] = {
   311     64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
   312     2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
   313     1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
   314     0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
   315     0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
   316     0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
   317     0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
   318     0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
   319     0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
   320     0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
   321     0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
   322     0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
   323     0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
   324     0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
   325     0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
   326   };
   327   // Normalized distortion
   328   // This table models the normalized distortion for a Laplacian source
   329   // source with given variance when quantized with a uniform quantizer
   330   // with given stepsize. The closed form expression is:
   331   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
   332   // where x = qpstep / sqrt(variance)
   333   // Note the actual distortion is Dn * variance.
   334   static const double dist_tab[] = {
   335     0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
   336     0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
   337     0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
   338     0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
   339     0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
   340     0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
   341     0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
   342     0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
   343     0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
   344     0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
   345     0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
   346     0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
   347     0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
   348     0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
   349     0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
   350   };
   351   /*
   352   assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
   353   assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
   354   assert(sizeof(rate_tab) == sizeof(dist_tab));
   355   */
   356   assert(x >= 0.0);
   357   linear_interpolate2(x, tab_size, inv_tab_step,
   358                       rate_tab, dist_tab, R, D);
   359 }
   361 static void model_rd_from_var_lapndz(int var, int n, int qstep,
   362                                      int *rate, int64_t *dist) {
   363   // This function models the rate and distortion for a Laplacian
   364   // source with given variance when quantized with a uniform quantizer
   365   // with given stepsize. The closed form expressions are in:
   366   // Hang and Chen, "Source Model for transform video coder and its
   367   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
   368   // Sys. for Video Tech., April 1997.
   369   vp9_clear_system_state();
   370   if (var == 0 || n == 0) {
   371     *rate = 0;
   372     *dist = 0;
   373   } else {
   374     double D, R;
   375     double s2 = (double) var / n;
   376     double x = qstep / sqrt(s2);
   377     model_rd_norm(x, &R, &D);
   378     *rate = (int)((n << 8) * R + 0.5);
   379     *dist = (int)(var * D + 0.5);
   380   }
   381   vp9_clear_system_state();
   382 }
   384 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
   385                             MACROBLOCK *x, MACROBLOCKD *xd,
   386                             int *out_rate_sum, int64_t *out_dist_sum) {
   387   // Note our transform coeffs are 8 times an orthogonal transform.
   388   // Hence quantizer step is also 8 times. To get effective quantizer
   389   // we need to divide by 8 before sending to modeling function.
   390   int i, rate_sum = 0, dist_sum = 0;
   392   for (i = 0; i < MAX_MB_PLANE; ++i) {
   393     struct macroblock_plane *const p = &x->plane[i];
   394     struct macroblockd_plane *const pd = &xd->plane[i];
   395     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
   396     unsigned int sse;
   397     int rate;
   398     int64_t dist;
   399     (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
   400                               pd->dst.buf, pd->dst.stride, &sse);
   401     // sse works better than var, since there is no dc prediction used
   402     model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
   403                              pd->dequant[1] >> 3, &rate, &dist);
   405     rate_sum += rate;
   406     dist_sum += (int)dist;
   407   }
   409   *out_rate_sum = rate_sum;
   410   *out_dist_sum = dist_sum << 4;
   411 }
   413 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
   414                                  TX_SIZE tx_size,
   415                                  MACROBLOCK *x, MACROBLOCKD *xd,
   416                                  int *out_rate_sum, int64_t *out_dist_sum,
   417                                  int *out_skip) {
   418   int j, k;
   419   BLOCK_SIZE bs;
   420   struct macroblock_plane *const p = &x->plane[0];
   421   struct macroblockd_plane *const pd = &xd->plane[0];
   422   const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
   423   const int height = 4 << num_4x4_blocks_high_lookup[bsize];
   424   int rate_sum = 0;
   425   int64_t dist_sum = 0;
   426   const int t = 4 << tx_size;
   428   if (tx_size == TX_4X4) {
   429     bs = BLOCK_4X4;
   430   } else if (tx_size == TX_8X8) {
   431     bs = BLOCK_8X8;
   432   } else if (tx_size == TX_16X16) {
   433     bs = BLOCK_16X16;
   434   } else if (tx_size == TX_32X32) {
   435     bs = BLOCK_32X32;
   436   } else {
   437     assert(0);
   438   }
   440   *out_skip = 1;
   441   for (j = 0; j < height; j += t) {
   442     for (k = 0; k < width; k += t) {
   443       int rate;
   444       int64_t dist;
   445       unsigned int sse;
   446       cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
   447                          &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
   448                          &sse);
   449       // sse works better than var, since there is no dc prediction used
   450       model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
   451       rate_sum += rate;
   452       dist_sum += dist;
   453       *out_skip &= (rate < 1024);
   454     }
   455   }
   457   *out_rate_sum = rate_sum;
   458   *out_dist_sum = dist_sum << 4;
   459 }
   461 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
   462                           intptr_t block_size, int64_t *ssz) {
   463   int i;
   464   int64_t error = 0, sqcoeff = 0;
   466   for (i = 0; i < block_size; i++) {
   467     int this_diff = coeff[i] - dqcoeff[i];
   468     error += (unsigned)this_diff * this_diff;
   469     sqcoeff += (unsigned) coeff[i] * coeff[i];
   470   }
   472   *ssz = sqcoeff;
   473   return error;
   474 }
   476 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
   477  * decide whether to include cost of a trailing EOB node or not (i.e. we
   478  * can skip this if the last coefficient in this transform block, e.g. the
   479  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
   480  * were non-zero). */
   481 static const int16_t band_counts[TX_SIZES][8] = {
   482   { 1, 2, 3, 4,  3,   16 - 13, 0 },
   483   { 1, 2, 3, 4, 11,   64 - 21, 0 },
   484   { 1, 2, 3, 4, 11,  256 - 21, 0 },
   485   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
   486 };
   488 static INLINE int cost_coeffs(MACROBLOCK *x,
   489                               int plane, int block,
   490                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
   491                               TX_SIZE tx_size,
   492                               const int16_t *scan, const int16_t *nb) {
   493   MACROBLOCKD *const xd = &x->e_mbd;
   494   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
   495   struct macroblockd_plane *pd = &xd->plane[plane];
   496   const PLANE_TYPE type = pd->plane_type;
   497   const int16_t *band_count = &band_counts[tx_size][1];
   498   const int eob = pd->eobs[block];
   499   const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
   500   const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
   501   unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
   502                    x->token_costs[tx_size][type][ref];
   503   const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
   504   uint8_t *p_tok = x->token_cache;
   505   int pt = combine_entropy_contexts(above_ec, left_ec);
   506   int c, cost;
   508   // Check for consistency of tx_size with mode info
   509   assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
   510                                       : get_uv_tx_size(mbmi) == tx_size);
   512   if (eob == 0) {
   513     // single eob token
   514     cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
   515     c = 0;
   516   } else {
   517     int band_left = *band_count++;
   519     // dc token
   520     int v = qcoeff_ptr[0];
   521     int prev_t = vp9_dct_value_tokens_ptr[v].token;
   522     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
   523     p_tok[0] = vp9_pt_energy_class[prev_t];
   524     ++token_costs;
   526     // ac tokens
   527     for (c = 1; c < eob; c++) {
   528       const int rc = scan[c];
   529       int t;
   531       v = qcoeff_ptr[rc];
   532       t = vp9_dct_value_tokens_ptr[v].token;
   533       pt = get_coef_context(nb, p_tok, c);
   534       cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
   535       p_tok[rc] = vp9_pt_energy_class[t];
   536       prev_t = t;
   537       if (!--band_left) {
   538         band_left = *band_count++;
   539         ++token_costs;
   540       }
   541     }
   543     // eob token
   544     if (band_left) {
   545       pt = get_coef_context(nb, p_tok, c);
   546       cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
   547     }
   548   }
   550   // is eob first coefficient;
   551   *A = *L = (c > 0);
   553   return cost;
   554 }
   556 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
   557   const int ss_txfrm_size = tx_size << 1;
   558   struct rdcost_block_args* args = arg;
   559   MACROBLOCK* const x = args->x;
   560   MACROBLOCKD* const xd = &x->e_mbd;
   561   struct macroblock_plane *const p = &x->plane[plane];
   562   struct macroblockd_plane *const pd = &xd->plane[plane];
   563   int64_t this_sse;
   564   int shift = args->tx_size == TX_32X32 ? 0 : 2;
   565   int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   566   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   567   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
   568                                &this_sse) >> shift;
   569   args->sse  = this_sse >> shift;
   571   if (x->skip_encode &&
   572       xd->mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) {
   573     // TODO(jingning): tune the model to better capture the distortion.
   574     int64_t p = (pd->dequant[1] * pd->dequant[1] *
   575                     (1 << ss_txfrm_size)) >> (shift + 2);
   576     args->dist += (p >> 4);
   577     args->sse  += p;
   578   }
   579 }
   581 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
   582                        TX_SIZE tx_size, void *arg) {
   583   struct rdcost_block_args* args = arg;
   585   int x_idx, y_idx;
   586   txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
   588   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
   589                            args->t_left + y_idx, args->tx_size,
   590                            args->scan, args->nb);
   591 }
   593 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
   594                            TX_SIZE tx_size, void *arg) {
   595   struct rdcost_block_args *args = arg;
   596   MACROBLOCK *const x = args->x;
   597   MACROBLOCKD *const xd = &x->e_mbd;
   598   struct encode_b_args encode_args = {x, NULL};
   599   int64_t rd1, rd2, rd;
   601   if (args->skip)
   602     return;
   604   if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
   605     vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
   606   else
   607     vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
   609   dist_block(plane, block, tx_size, args);
   610   rate_block(plane, block, plane_bsize, tx_size, args);
   611   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
   612   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
   614   // TODO(jingning): temporarily enabled only for luma component
   615   rd = MIN(rd1, rd2);
   616   if (!xd->lossless && plane == 0)
   617     x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
   619   args->this_rate += args->rate;
   620   args->this_dist += args->dist;
   621   args->this_sse  += args->sse;
   622   args->this_rd += rd;
   624   if (args->this_rd > args->best_rd) {
   625     args->skip = 1;
   626     return;
   627   }
   628 }
   630 void vp9_get_entropy_contexts(TX_SIZE tx_size,
   631     ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
   632     const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
   633     int num_4x4_w, int num_4x4_h) {
   634   int i;
   635   switch (tx_size) {
   636     case TX_4X4:
   637       vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
   638       vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
   639       break;
   640     case TX_8X8:
   641       for (i = 0; i < num_4x4_w; i += 2)
   642         t_above[i] = !!*(const uint16_t *)&above[i];
   643       for (i = 0; i < num_4x4_h; i += 2)
   644         t_left[i] = !!*(const uint16_t *)&left[i];
   645       break;
   646     case TX_16X16:
   647       for (i = 0; i < num_4x4_w; i += 4)
   648         t_above[i] = !!*(const uint32_t *)&above[i];
   649       for (i = 0; i < num_4x4_h; i += 4)
   650         t_left[i] = !!*(const uint32_t *)&left[i];
   651       break;
   652     case TX_32X32:
   653       for (i = 0; i < num_4x4_w; i += 8)
   654         t_above[i] = !!*(const uint64_t *)&above[i];
   655       for (i = 0; i < num_4x4_h; i += 8)
   656         t_left[i] = !!*(const uint64_t *)&left[i];
   657       break;
   658     default:
   659       assert(!"Invalid transform size.");
   660   }
   661 }
   663 static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
   664                               const int num_4x4_w, const int num_4x4_h,
   665                               const int64_t ref_rdcost,
   666                               struct rdcost_block_args *arg) {
   667   vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
   668   arg->x = x;
   669   arg->tx_size = tx_size;
   670   arg->bw = num_4x4_w;
   671   arg->bh = num_4x4_h;
   672   arg->best_rd = ref_rdcost;
   673 }
   675 static void txfm_rd_in_plane(MACROBLOCK *x,
   676                              struct rdcost_block_args *rd_stack,
   677                              int *rate, int64_t *distortion,
   678                              int *skippable, int64_t *sse,
   679                              int64_t ref_best_rd, int plane,
   680                              BLOCK_SIZE bsize, TX_SIZE tx_size) {
   681   MACROBLOCKD *const xd = &x->e_mbd;
   682   struct macroblockd_plane *const pd = &xd->plane[plane];
   683   const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
   684   const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
   685   const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
   687   init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
   688                     ref_best_rd, rd_stack);
   689   if (plane == 0)
   690     xd->mi_8x8[0]->mbmi.tx_size = tx_size;
   692   vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
   693                            pd->above_context, pd->left_context,
   694                            num_4x4_w, num_4x4_h);
   696   get_scan(xd, tx_size, pd->plane_type, 0, &rd_stack->scan, &rd_stack->nb);
   698   foreach_transformed_block_in_plane(xd, bsize, plane,
   699                                      block_yrd_txfm, rd_stack);
   700   if (rd_stack->skip) {
   701     *rate       = INT_MAX;
   702     *distortion = INT64_MAX;
   703     *sse        = INT64_MAX;
   704     *skippable  = 0;
   705   } else {
   706     *distortion = rd_stack->this_dist;
   707     *rate       = rd_stack->this_rate;
   708     *sse        = rd_stack->this_sse;
   709     *skippable  = vp9_is_skippable_in_plane(xd, bsize, plane);
   710   }
   711 }
   713 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
   714                                      int *rate, int64_t *distortion,
   715                                      int *skip, int64_t *sse,
   716                                      int64_t ref_best_rd,
   717                                      BLOCK_SIZE bs) {
   718   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   719   VP9_COMMON *const cm = &cpi->common;
   720   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
   721   MACROBLOCKD *const xd = &x->e_mbd;
   722   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   724   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
   726   txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
   727                    &sse[mbmi->tx_size], ref_best_rd, 0, bs,
   728                    mbmi->tx_size);
   729   cpi->tx_stepdown_count[0]++;
   730 }
   732 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
   733                                      int (*r)[2], int *rate,
   734                                      int64_t *d, int64_t *distortion,
   735                                      int *s, int *skip,
   736                                      int64_t tx_cache[TX_MODES],
   737                                      BLOCK_SIZE bs) {
   738   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   739   VP9_COMMON *const cm = &cpi->common;
   740   MACROBLOCKD *const xd = &x->e_mbd;
   741   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   742   vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
   743   int64_t rd[TX_SIZES][2];
   744   int n, m;
   745   int s0, s1;
   747   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
   749   for (n = TX_4X4; n <= max_tx_size; n++) {
   750     r[n][1] = r[n][0];
   751     if (r[n][0] == INT_MAX)
   752       continue;
   753     for (m = 0; m <= n - (n == max_tx_size); m++) {
   754       if (m == n)
   755         r[n][1] += vp9_cost_zero(tx_probs[m]);
   756       else
   757         r[n][1] += vp9_cost_one(tx_probs[m]);
   758     }
   759   }
   761   assert(skip_prob > 0);
   762   s0 = vp9_cost_bit(skip_prob, 0);
   763   s1 = vp9_cost_bit(skip_prob, 1);
   765   for (n = TX_4X4; n <= max_tx_size; n++) {
   766     if (d[n] == INT64_MAX) {
   767       rd[n][0] = rd[n][1] = INT64_MAX;
   768       continue;
   769     }
   770     if (s[n]) {
   771       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
   772     } else {
   773       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
   774       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
   775     }
   776   }
   778   if (max_tx_size == TX_32X32 &&
   779       (cm->tx_mode == ALLOW_32X32 ||
   780        (cm->tx_mode == TX_MODE_SELECT &&
   781         rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
   782         rd[TX_32X32][1] < rd[TX_4X4][1]))) {
   783     mbmi->tx_size = TX_32X32;
   784   } else if (max_tx_size >= TX_16X16 &&
   785              (cm->tx_mode == ALLOW_16X16 ||
   786               cm->tx_mode == ALLOW_32X32 ||
   787               (cm->tx_mode == TX_MODE_SELECT &&
   788                rd[TX_16X16][1] < rd[TX_8X8][1] &&
   789                rd[TX_16X16][1] < rd[TX_4X4][1]))) {
   790     mbmi->tx_size = TX_16X16;
   791   } else if (cm->tx_mode == ALLOW_8X8 ||
   792              cm->tx_mode == ALLOW_16X16 ||
   793              cm->tx_mode == ALLOW_32X32 ||
   794            (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
   795     mbmi->tx_size = TX_8X8;
   796   } else {
   797     mbmi->tx_size = TX_4X4;
   798   }
   800   *distortion = d[mbmi->tx_size];
   801   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
   802   *skip       = s[mbmi->tx_size];
   804   tx_cache[ONLY_4X4] = rd[TX_4X4][0];
   805   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
   806   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
   807   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
   808   if (max_tx_size == TX_32X32 &&
   809       rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
   810       rd[TX_32X32][1] < rd[TX_4X4][1])
   811     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
   812   else if (max_tx_size >= TX_16X16 &&
   813            rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
   814     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
   815   else
   816     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
   817                                  rd[TX_4X4][1] : rd[TX_8X8][1];
   819   if (max_tx_size == TX_32X32 &&
   820       rd[TX_32X32][1] < rd[TX_16X16][1] &&
   821       rd[TX_32X32][1] < rd[TX_8X8][1] &&
   822       rd[TX_32X32][1] < rd[TX_4X4][1]) {
   823     cpi->tx_stepdown_count[0]++;
   824   } else if (max_tx_size >= TX_16X16 &&
   825              rd[TX_16X16][1] < rd[TX_8X8][1] &&
   826              rd[TX_16X16][1] < rd[TX_4X4][1]) {
   827     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
   828   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
   829     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
   830   } else {
   831     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
   832   }
   833 }
   835 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
   836                                           int (*r)[2], int *rate,
   837                                           int64_t *d, int64_t *distortion,
   838                                           int *s, int *skip, int64_t *sse,
   839                                           int64_t ref_best_rd,
   840                                           BLOCK_SIZE bs) {
   841   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   842   VP9_COMMON *const cm = &cpi->common;
   843   MACROBLOCKD *const xd = &x->e_mbd;
   844   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   845   vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
   846   int64_t rd[TX_SIZES][2];
   847   int n, m;
   848   int s0, s1;
   849   double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
   850   // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
   852   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
   854   // for (n = TX_4X4; n <= max_txfm_size; n++)
   855   //   r[n][0] = (r[n][0] * scale_r[n]);
   857   for (n = TX_4X4; n <= max_tx_size; n++) {
   858     r[n][1] = r[n][0];
   859     for (m = 0; m <= n - (n == max_tx_size); m++) {
   860       if (m == n)
   861         r[n][1] += vp9_cost_zero(tx_probs[m]);
   862       else
   863         r[n][1] += vp9_cost_one(tx_probs[m]);
   864     }
   865   }
   867   assert(skip_prob > 0);
   868   s0 = vp9_cost_bit(skip_prob, 0);
   869   s1 = vp9_cost_bit(skip_prob, 1);
   871   for (n = TX_4X4; n <= max_tx_size; n++) {
   872     if (s[n]) {
   873       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
   874     } else {
   875       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
   876       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
   877     }
   878   }
   879   for (n = TX_4X4; n <= max_tx_size; n++) {
   880     rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
   881     rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
   882   }
   884   if (max_tx_size == TX_32X32 &&
   885       (cm->tx_mode == ALLOW_32X32 ||
   886        (cm->tx_mode == TX_MODE_SELECT &&
   887         rd[TX_32X32][1] <= rd[TX_16X16][1] &&
   888         rd[TX_32X32][1] <= rd[TX_8X8][1] &&
   889         rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
   890     mbmi->tx_size = TX_32X32;
   891   } else if (max_tx_size >= TX_16X16 &&
   892              (cm->tx_mode == ALLOW_16X16 ||
   893               cm->tx_mode == ALLOW_32X32 ||
   894               (cm->tx_mode == TX_MODE_SELECT &&
   895                rd[TX_16X16][1] <= rd[TX_8X8][1] &&
   896                rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
   897     mbmi->tx_size = TX_16X16;
   898   } else if (cm->tx_mode == ALLOW_8X8 ||
   899              cm->tx_mode == ALLOW_16X16 ||
   900              cm->tx_mode == ALLOW_32X32 ||
   901            (cm->tx_mode == TX_MODE_SELECT &&
   902             rd[TX_8X8][1] <= rd[TX_4X4][1])) {
   903     mbmi->tx_size = TX_8X8;
   904   } else {
   905     mbmi->tx_size = TX_4X4;
   906   }
   908   // Actually encode using the chosen mode if a model was used, but do not
   909   // update the r, d costs
   910   txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
   911                    &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
   913   if (max_tx_size == TX_32X32 &&
   914       rd[TX_32X32][1] <= rd[TX_16X16][1] &&
   915       rd[TX_32X32][1] <= rd[TX_8X8][1] &&
   916       rd[TX_32X32][1] <= rd[TX_4X4][1]) {
   917     cpi->tx_stepdown_count[0]++;
   918   } else if (max_tx_size >= TX_16X16 &&
   919              rd[TX_16X16][1] <= rd[TX_8X8][1] &&
   920              rd[TX_16X16][1] <= rd[TX_4X4][1]) {
   921     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
   922   } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
   923     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
   924   } else {
   925     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
   926   }
   927 }
   929 static void super_block_yrd(VP9_COMP *cpi,
   930                             MACROBLOCK *x, int *rate, int64_t *distortion,
   931                             int *skip, int64_t *psse, BLOCK_SIZE bs,
   932                             int64_t txfm_cache[TX_MODES],
   933                             int64_t ref_best_rd) {
   934   int r[TX_SIZES][2], s[TX_SIZES];
   935   int64_t d[TX_SIZES], sse[TX_SIZES];
   936   MACROBLOCKD *xd = &x->e_mbd;
   937   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   938   struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
   939   const int b_inter_mode = is_inter_block(mbmi);
   941   assert(bs == mbmi->sb_type);
   942   if (b_inter_mode)
   943     vp9_subtract_sby(x, bs);
   945   if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
   946       (cpi->sf.tx_size_search_method != USE_FULL_RD &&
   947        !b_inter_mode)) {
   948     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
   949     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
   950                              ref_best_rd, bs);
   951     if (psse)
   952       *psse = sse[mbmi->tx_size];
   953     return;
   954   }
   956   if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
   957       b_inter_mode) {
   958     if (bs >= BLOCK_32X32)
   959       model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
   960                            &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
   961     if (bs >= BLOCK_16X16)
   962       model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
   963                            &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
   965     model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
   966                          &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
   968     model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
   969                          &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
   971     choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
   972                                   skip, sse, ref_best_rd, bs);
   973   } else {
   974     if (bs >= BLOCK_32X32)
   975       txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32],
   976                        &s[TX_32X32], &sse[TX_32X32],
   977                        ref_best_rd, 0, bs, TX_32X32);
   978     if (bs >= BLOCK_16X16)
   979       txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16],
   980                        &s[TX_16X16], &sse[TX_16X16],
   981                        ref_best_rd, 0, bs, TX_16X16);
   982     txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
   983                      &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
   984     txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
   985                      &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
   986     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
   987                              skip, txfm_cache, bs);
   988   }
   989   if (psse)
   990     *psse = sse[mbmi->tx_size];
   991 }
   993 static int conditional_skipintra(MB_PREDICTION_MODE mode,
   994                                  MB_PREDICTION_MODE best_intra_mode) {
   995   if (mode == D117_PRED &&
   996       best_intra_mode != V_PRED &&
   997       best_intra_mode != D135_PRED)
   998     return 1;
   999   if (mode == D63_PRED &&
  1000       best_intra_mode != V_PRED &&
  1001       best_intra_mode != D45_PRED)
  1002     return 1;
  1003   if (mode == D207_PRED &&
  1004       best_intra_mode != H_PRED &&
  1005       best_intra_mode != D45_PRED)
  1006     return 1;
  1007   if (mode == D153_PRED &&
  1008       best_intra_mode != H_PRED &&
  1009       best_intra_mode != D135_PRED)
  1010     return 1;
  1011   return 0;
  1014 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
  1015                                      MB_PREDICTION_MODE *best_mode,
  1016                                      int *bmode_costs,
  1017                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
  1018                                      int *bestrate, int *bestratey,
  1019                                      int64_t *bestdistortion,
  1020                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
  1021   MB_PREDICTION_MODE mode;
  1022   MACROBLOCKD *xd = &x->e_mbd;
  1023   int64_t best_rd = rd_thresh;
  1024   int rate = 0;
  1025   int64_t distortion;
  1026   struct macroblock_plane *p = &x->plane[0];
  1027   struct macroblockd_plane *pd = &xd->plane[0];
  1028   const int src_stride = p->src.stride;
  1029   const int dst_stride = pd->dst.stride;
  1030   uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib,
  1031                                                 p->src.buf, src_stride);
  1032   uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib,
  1033                                                 pd->dst.buf, dst_stride);
  1034   int16_t *src_diff, *coeff;
  1036   ENTROPY_CONTEXT ta[2], tempa[2];
  1037   ENTROPY_CONTEXT tl[2], templ[2];
  1039   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1040   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1041   int idx, idy;
  1042   uint8_t best_dst[8 * 8];
  1044   assert(ib < 4);
  1046   vpx_memcpy(ta, a, sizeof(ta));
  1047   vpx_memcpy(tl, l, sizeof(tl));
  1048   xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
  1050   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
  1051     int64_t this_rd;
  1052     int ratey = 0;
  1054     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
  1055       continue;
  1057     // Only do the oblique modes if the best so far is
  1058     // one of the neighboring directional modes
  1059     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
  1060       if (conditional_skipintra(mode, *best_mode))
  1061           continue;
  1064     rate = bmode_costs[mode];
  1065     distortion = 0;
  1067     vpx_memcpy(tempa, ta, sizeof(ta));
  1068     vpx_memcpy(templ, tl, sizeof(tl));
  1070     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
  1071       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
  1072         int64_t ssz;
  1073         const int16_t *scan;
  1074         const int16_t *nb;
  1075         uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
  1076         uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
  1077         const int block = ib + idy * 2 + idx;
  1078         TX_TYPE tx_type;
  1079         xd->mi_8x8[0]->bmi[block].as_mode = mode;
  1080         src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
  1081         coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
  1082         vp9_predict_intra_block(xd, block, 1,
  1083                                 TX_4X4, mode,
  1084                                 x->skip_encode ? src : dst,
  1085                                 x->skip_encode ? src_stride : dst_stride,
  1086                                 dst, dst_stride);
  1087         vp9_subtract_block(4, 4, src_diff, 8,
  1088                            src, src_stride,
  1089                            dst, dst_stride);
  1091         tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
  1092         get_scan_nb_4x4(tx_type, &scan, &nb);
  1094         if (tx_type != DCT_DCT)
  1095           vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
  1096         else
  1097           x->fwd_txm4x4(src_diff, coeff, 8);
  1099         vp9_regular_quantize_b_4x4(x, 4, block, scan, get_iscan_4x4(tx_type));
  1101         ratey += cost_coeffs(x, 0, block,
  1102                              tempa + idx, templ + idy, TX_4X4, scan, nb);
  1103         distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
  1104                                       16, &ssz) >> 2;
  1105         if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
  1106           goto next;
  1108         if (tx_type != DCT_DCT)
  1109           vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
  1110                                dst, pd->dst.stride, tx_type);
  1111         else
  1112           xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
  1113                        16);
  1117     rate += ratey;
  1118     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
  1120     if (this_rd < best_rd) {
  1121       *bestrate = rate;
  1122       *bestratey = ratey;
  1123       *bestdistortion = distortion;
  1124       best_rd = this_rd;
  1125       *best_mode = mode;
  1126       vpx_memcpy(a, tempa, sizeof(tempa));
  1127       vpx_memcpy(l, templ, sizeof(templ));
  1128       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
  1129         vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
  1130                    num_4x4_blocks_wide * 4);
  1132   next:
  1133     {}
  1136   if (best_rd >= rd_thresh || x->skip_encode)
  1137     return best_rd;
  1139   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
  1140     vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
  1141                num_4x4_blocks_wide * 4);
  1143   return best_rd;
  1146 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
  1147                                             MACROBLOCK * const mb,
  1148                                             int * const rate,
  1149                                             int * const rate_y,
  1150                                             int64_t * const distortion,
  1151                                             int64_t best_rd) {
  1152   int i, j;
  1153   MACROBLOCKD *const xd = &mb->e_mbd;
  1154   MODE_INFO *const mic = xd->mi_8x8[0];
  1155   const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
  1156   const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
  1157   const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
  1158   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1159   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1160   int idx, idy;
  1161   int cost = 0;
  1162   int64_t total_distortion = 0;
  1163   int tot_rate_y = 0;
  1164   int64_t total_rd = 0;
  1165   ENTROPY_CONTEXT t_above[4], t_left[4];
  1166   int *bmode_costs;
  1168   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  1169   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
  1171   bmode_costs = mb->mbmode_cost;
  1173   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
  1174   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  1175     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  1176       MB_PREDICTION_MODE best_mode = DC_PRED;
  1177       int r = INT_MAX, ry = INT_MAX;
  1178       int64_t d = INT64_MAX, this_rd = INT64_MAX;
  1179       i = idy * 2 + idx;
  1180       if (cpi->common.frame_type == KEY_FRAME) {
  1181         const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i);
  1182         const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, i);
  1184         bmode_costs  = mb->y_mode_costs[A][L];
  1187       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
  1188                                       t_above + idx, t_left + idy, &r, &ry, &d,
  1189                                       bsize, best_rd - total_rd);
  1190       if (this_rd >= best_rd - total_rd)
  1191         return INT64_MAX;
  1193       total_rd += this_rd;
  1194       cost += r;
  1195       total_distortion += d;
  1196       tot_rate_y += ry;
  1198       mic->bmi[i].as_mode = best_mode;
  1199       for (j = 1; j < num_4x4_blocks_high; ++j)
  1200         mic->bmi[i + j * 2].as_mode = best_mode;
  1201       for (j = 1; j < num_4x4_blocks_wide; ++j)
  1202         mic->bmi[i + j].as_mode = best_mode;
  1204       if (total_rd >= best_rd)
  1205         return INT64_MAX;
  1209   *rate = cost;
  1210   *rate_y = tot_rate_y;
  1211   *distortion = total_distortion;
  1212   mic->mbmi.mode = mic->bmi[3].as_mode;
  1214   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
  1217 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
  1218                                       int *rate, int *rate_tokenonly,
  1219                                       int64_t *distortion, int *skippable,
  1220                                       BLOCK_SIZE bsize,
  1221                                       int64_t tx_cache[TX_MODES],
  1222                                       int64_t best_rd) {
  1223   MB_PREDICTION_MODE mode;
  1224   MB_PREDICTION_MODE mode_selected = DC_PRED;
  1225   MACROBLOCKD *const xd = &x->e_mbd;
  1226   MODE_INFO *const mic = xd->mi_8x8[0];
  1227   int this_rate, this_rate_tokenonly, s;
  1228   int64_t this_distortion, this_rd;
  1229   TX_SIZE best_tx = TX_4X4;
  1230   int i;
  1231   int *bmode_costs = x->mbmode_cost;
  1233   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
  1234     for (i = 0; i < TX_MODES; i++)
  1235       tx_cache[i] = INT64_MAX;
  1237   /* Y Search for intra prediction mode */
  1238   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
  1239     int64_t local_tx_cache[TX_MODES];
  1240     MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
  1241     MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
  1243     if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
  1244       continue;
  1246     if (cpi->common.frame_type == KEY_FRAME) {
  1247       const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0);
  1248       const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, 0);
  1250       bmode_costs = x->y_mode_costs[A][L];
  1252     mic->mbmi.mode = mode;
  1254     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
  1255                     bsize, local_tx_cache, best_rd);
  1257     if (this_rate_tokenonly == INT_MAX)
  1258       continue;
  1260     this_rate = this_rate_tokenonly + bmode_costs[mode];
  1261     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
  1263     if (this_rd < best_rd) {
  1264       mode_selected   = mode;
  1265       best_rd         = this_rd;
  1266       best_tx         = mic->mbmi.tx_size;
  1267       *rate           = this_rate;
  1268       *rate_tokenonly = this_rate_tokenonly;
  1269       *distortion     = this_distortion;
  1270       *skippable      = s;
  1273     if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
  1274       for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
  1275         const int64_t adj_rd = this_rd + local_tx_cache[i] -
  1276             local_tx_cache[cpi->common.tx_mode];
  1277         if (adj_rd < tx_cache[i]) {
  1278           tx_cache[i] = adj_rd;
  1284   mic->mbmi.mode = mode_selected;
  1285   mic->mbmi.tx_size = best_tx;
  1287   return best_rd;
  1290 static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
  1291                              int *rate, int64_t *distortion, int *skippable,
  1292                              int64_t *sse, BLOCK_SIZE bsize,
  1293                              int64_t ref_best_rd) {
  1294   MACROBLOCKD *const xd = &x->e_mbd;
  1295   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
  1296   TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
  1297   int plane;
  1298   int pnrate = 0, pnskip = 1;
  1299   int64_t pndist = 0, pnsse = 0;
  1301   if (ref_best_rd < 0)
  1302     goto term;
  1304   if (is_inter_block(mbmi))
  1305     vp9_subtract_sbuv(x, bsize);
  1307   *rate = 0;
  1308   *distortion = 0;
  1309   *sse = 0;
  1310   *skippable = 1;
  1312   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
  1313     txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse,
  1314                      ref_best_rd, plane, bsize, uv_txfm_size);
  1315     if (pnrate == INT_MAX)
  1316       goto term;
  1317     *rate += pnrate;
  1318     *distortion += pndist;
  1319     *sse += pnsse;
  1320     *skippable &= pnskip;
  1322   return;
  1324   term:
  1325   *rate = INT_MAX;
  1326   *distortion = INT64_MAX;
  1327   *sse = INT64_MAX;
  1328   *skippable = 0;
  1329   return;
  1332 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
  1333                                        PICK_MODE_CONTEXT *ctx,
  1334                                        int *rate, int *rate_tokenonly,
  1335                                        int64_t *distortion, int *skippable,
  1336                                        BLOCK_SIZE bsize) {
  1337   MB_PREDICTION_MODE mode;
  1338   MB_PREDICTION_MODE mode_selected = DC_PRED;
  1339   int64_t best_rd = INT64_MAX, this_rd;
  1340   int this_rate_tokenonly, this_rate, s;
  1341   int64_t this_distortion, this_sse;
  1343   // int mode_mask = (bsize <= BLOCK_8X8)
  1344   //                ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
  1346   for (mode = DC_PRED; mode <= TM_PRED; mode ++) {
  1347     // if (!(mode_mask & (1 << mode)))
  1348     if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]]
  1349           & (1 << mode)))
  1350       continue;
  1352     x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
  1354     super_block_uvrd(cpi, x, &this_rate_tokenonly,
  1355                      &this_distortion, &s, &this_sse, bsize, best_rd);
  1356     if (this_rate_tokenonly == INT_MAX)
  1357       continue;
  1358     this_rate = this_rate_tokenonly +
  1359                 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
  1360     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
  1362     if (this_rd < best_rd) {
  1363       mode_selected   = mode;
  1364       best_rd         = this_rd;
  1365       *rate           = this_rate;
  1366       *rate_tokenonly = this_rate_tokenonly;
  1367       *distortion     = this_distortion;
  1368       *skippable      = s;
  1369       if (!x->select_txfm_size) {
  1370         int i;
  1371         struct macroblock_plane *const p = x->plane;
  1372         struct macroblockd_plane *const pd = x->e_mbd.plane;
  1373         for (i = 1; i < MAX_MB_PLANE; ++i) {
  1374           p[i].coeff    = ctx->coeff_pbuf[i][2];
  1375           pd[i].qcoeff  = ctx->qcoeff_pbuf[i][2];
  1376           pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
  1377           pd[i].eobs    = ctx->eobs_pbuf[i][2];
  1379           ctx->coeff_pbuf[i][2]   = ctx->coeff_pbuf[i][0];
  1380           ctx->qcoeff_pbuf[i][2]  = ctx->qcoeff_pbuf[i][0];
  1381           ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
  1382           ctx->eobs_pbuf[i][2]    = ctx->eobs_pbuf[i][0];
  1384           ctx->coeff_pbuf[i][0]   = p[i].coeff;
  1385           ctx->qcoeff_pbuf[i][0]  = pd[i].qcoeff;
  1386           ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
  1387           ctx->eobs_pbuf[i][0]    = pd[i].eobs;
  1393   x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
  1395   return best_rd;
  1398 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
  1399                               int *rate, int *rate_tokenonly,
  1400                               int64_t *distortion, int *skippable,
  1401                               BLOCK_SIZE bsize) {
  1402   int64_t this_rd;
  1403   int64_t this_sse;
  1405   x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
  1406   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
  1407                    skippable, &this_sse, bsize, INT64_MAX);
  1408   *rate = *rate_tokenonly +
  1409           x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
  1410   this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
  1412   return this_rd;
  1415 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
  1416                                  BLOCK_SIZE bsize, int *rate_uv,
  1417                                  int *rate_uv_tokenonly,
  1418                                  int64_t *dist_uv, int *skip_uv,
  1419                                  MB_PREDICTION_MODE *mode_uv) {
  1420   MACROBLOCK *const x = &cpi->mb;
  1422   // Use an estimated rd for uv_intra based on DC_PRED if the
  1423   // appropriate speed flag is set.
  1424   if (cpi->sf.use_uv_intra_rd_estimate) {
  1425     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
  1426                    bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
  1427   // Else do a proper rd search for each possible transform size that may
  1428   // be considered in the main rd loop.
  1429   } else {
  1430     rd_pick_intra_sbuv_mode(cpi, x, ctx,
  1431                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
  1432                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
  1434   *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
  1437 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
  1438                        int mode_context) {
  1439   MACROBLOCK *const x = &cpi->mb;
  1440   MACROBLOCKD *const xd = &x->e_mbd;
  1441   const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
  1443   // Don't account for mode here if segment skip is enabled.
  1444   if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
  1445     assert(is_inter_mode(mode));
  1446     return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
  1447   } else {
  1448     return 0;
  1452 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
  1453   x->e_mbd.mi_8x8[0]->mbmi.mode = mb;
  1454   x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int;
  1457 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
  1458                                 BLOCK_SIZE bsize,
  1459                                 int_mv *frame_mv,
  1460                                 int mi_row, int mi_col,
  1461                                 int_mv single_newmv[MAX_REF_FRAMES],
  1462                                 int *rate_mv);
  1464 static int labels2mode(MACROBLOCK *x, int i,
  1465                        MB_PREDICTION_MODE this_mode,
  1466                        int_mv *this_mv, int_mv *this_second_mv,
  1467                        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
  1468                        int_mv seg_mvs[MAX_REF_FRAMES],
  1469                        int_mv *best_ref_mv,
  1470                        int_mv *second_best_ref_mv,
  1471                        int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
  1472   MACROBLOCKD *const xd = &x->e_mbd;
  1473   MODE_INFO *const mic = xd->mi_8x8[0];
  1474   MB_MODE_INFO *mbmi = &mic->mbmi;
  1475   int cost = 0, thismvcost = 0;
  1476   int idx, idy;
  1477   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
  1478   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
  1479   const int has_second_rf = has_second_ref(mbmi);
  1481   /* We have to be careful retrieving previously-encoded motion vectors.
  1482    Ones from this macroblock have to be pulled from the BLOCKD array
  1483    as they have not yet made it to the bmi array in our MB_MODE_INFO. */
  1484   MB_PREDICTION_MODE m;
  1486   // the only time we should do costing for new motion vector or mode
  1487   // is when we are on a new label  (jbb May 08, 2007)
  1488   switch (m = this_mode) {
  1489     case NEWMV:
  1490       this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
  1491       thismvcost  = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
  1492                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
  1493       if (has_second_rf) {
  1494         this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
  1495         thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
  1496                                       &second_best_ref_mv->as_mv,
  1497                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
  1499       break;
  1500     case NEARESTMV:
  1501       this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
  1502       if (has_second_rf)
  1503         this_second_mv->as_int =
  1504             frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
  1505       break;
  1506     case NEARMV:
  1507       this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
  1508       if (has_second_rf)
  1509         this_second_mv->as_int =
  1510             frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
  1511       break;
  1512     case ZEROMV:
  1513       this_mv->as_int = 0;
  1514       if (has_second_rf)
  1515         this_second_mv->as_int = 0;
  1516       break;
  1517     default:
  1518       break;
  1521   cost = cost_mv_ref(cpi, this_mode,
  1522                      mbmi->mode_context[mbmi->ref_frame[0]]);
  1524   mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
  1525   if (has_second_rf)
  1526     mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
  1528   mic->bmi[i].as_mode = m;
  1530   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
  1531     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
  1532       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
  1533                  &mic->bmi[i], sizeof(mic->bmi[i]));
  1535   cost += thismvcost;
  1536   return cost;
  1539 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
  1540                                        MACROBLOCK *x,
  1541                                        int64_t best_yrd,
  1542                                        int i,
  1543                                        int *labelyrate,
  1544                                        int64_t *distortion, int64_t *sse,
  1545                                        ENTROPY_CONTEXT *ta,
  1546                                        ENTROPY_CONTEXT *tl) {
  1547   int k;
  1548   MACROBLOCKD *xd = &x->e_mbd;
  1549   struct macroblockd_plane *const pd = &xd->plane[0];
  1550   struct macroblock_plane *const p = &x->plane[0];
  1551   MODE_INFO *const mi = xd->mi_8x8[0];
  1552   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
  1553   const int width = plane_block_width(bsize, pd);
  1554   const int height = plane_block_height(bsize, pd);
  1555   int idx, idy;
  1557   uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i,
  1558                                                  p->src.buf, p->src.stride);
  1559   uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i,
  1560                                                  pd->dst.buf, pd->dst.stride);
  1561   int64_t thisdistortion = 0, thissse = 0;
  1562   int thisrate = 0, ref;
  1563   const int is_compound = has_second_ref(&mi->mbmi);
  1564   for (ref = 0; ref < 1 + is_compound; ++ref) {
  1565     const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i,
  1566                                      pd->pre[ref].buf, pd->pre[ref].stride);
  1567     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
  1568                               dst, pd->dst.stride,
  1569                               &mi->bmi[i].as_mv[ref].as_mv,
  1570                               &xd->scale_factor[ref],
  1571                               width, height, ref, &xd->subpix, MV_PRECISION_Q3);
  1574   vp9_subtract_block(height, width,
  1575                      raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
  1576                      src, p->src.stride,
  1577                      dst, pd->dst.stride);
  1579   k = i;
  1580   for (idy = 0; idy < height / 4; ++idy) {
  1581     for (idx = 0; idx < width / 4; ++idx) {
  1582       int64_t ssz, rd, rd1, rd2;
  1583       int16_t* coeff;
  1585       k += (idy * 2 + idx);
  1586       coeff = BLOCK_OFFSET(p->coeff, k);
  1587       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
  1588                     coeff, 8);
  1589       vp9_regular_quantize_b_4x4(x, 4, k, get_scan_4x4(DCT_DCT),
  1590                                  get_iscan_4x4(DCT_DCT));
  1591       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
  1592                                         16, &ssz);
  1593       thissse += ssz;
  1594       thisrate += cost_coeffs(x, 0, k,
  1595                               ta + (k & 1),
  1596                               tl + (k >> 1), TX_4X4,
  1597                               vp9_default_scan_4x4,
  1598                               vp9_default_scan_4x4_neighbors);
  1599       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
  1600       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
  1601       rd = MIN(rd1, rd2);
  1602       if (rd >= best_yrd)
  1603         return INT64_MAX;
  1607   *distortion = thisdistortion >> 2;
  1608   *labelyrate = thisrate;
  1609   *sse = thissse >> 2;
  1611   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
  1614 typedef struct {
  1615   int eobs;
  1616   int brate;
  1617   int byrate;
  1618   int64_t bdist;
  1619   int64_t bsse;
  1620   int64_t brdcost;
  1621   int_mv mvs[2];
  1622   ENTROPY_CONTEXT ta[2];
  1623   ENTROPY_CONTEXT tl[2];
  1624 } SEG_RDSTAT;
  1626 typedef struct {
  1627   int_mv *ref_mv, *second_ref_mv;
  1628   int_mv mvp;
  1630   int64_t segment_rd;
  1631   int r;
  1632   int64_t d;
  1633   int64_t sse;
  1634   int segment_yrate;
  1635   MB_PREDICTION_MODE modes[4];
  1636   SEG_RDSTAT rdstat[4][INTER_MODES];
  1637   int mvthresh;
  1638 } BEST_SEG_INFO;
  1640 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
  1641   int r = 0;
  1642   r |= (mv->as_mv.row >> 3) < x->mv_row_min;
  1643   r |= (mv->as_mv.row >> 3) > x->mv_row_max;
  1644   r |= (mv->as_mv.col >> 3) < x->mv_col_min;
  1645   r |= (mv->as_mv.col >> 3) > x->mv_col_max;
  1646   return r;
  1649 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
  1650   MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
  1651   struct macroblock_plane *const p = &x->plane[0];
  1652   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
  1654   p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf,
  1655                                          p->src.stride);
  1656   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
  1657   pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf,
  1658                                              pd->pre[0].stride);
  1659   if (has_second_ref(mbmi))
  1660     pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf,
  1661                                                pd->pre[1].stride);
  1664 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
  1665                                   struct buf_2d orig_pre[2]) {
  1666   MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
  1667   x->plane[0].src = orig_src;
  1668   x->e_mbd.plane[0].pre[0] = orig_pre[0];
  1669   if (has_second_ref(mbmi))
  1670     x->e_mbd.plane[0].pre[1] = orig_pre[1];
  1673 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
  1674                                     const TileInfo *const tile,
  1675                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
  1676                                     int_mv seg_mvs[4][MAX_REF_FRAMES],
  1677                                     int mi_row, int mi_col) {
  1678   int i, br = 0, idx, idy;
  1679   int64_t bd = 0, block_sse = 0;
  1680   MB_PREDICTION_MODE this_mode;
  1681   MODE_INFO *mi = x->e_mbd.mi_8x8[0];
  1682   MB_MODE_INFO *const mbmi = &mi->mbmi;
  1683   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
  1684   const int label_count = 4;
  1685   int64_t this_segment_rd = 0;
  1686   int label_mv_thresh;
  1687   int segmentyrate = 0;
  1688   const BLOCK_SIZE bsize = mbmi->sb_type;
  1689   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1690   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1691   vp9_variance_fn_ptr_t *v_fn_ptr;
  1692   ENTROPY_CONTEXT t_above[2], t_left[2];
  1693   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
  1694   int mode_idx;
  1695   int subpelmv = 1, have_ref = 0;
  1696   const int has_second_rf = has_second_ref(mbmi);
  1698   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
  1699   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
  1701   v_fn_ptr = &cpi->fn_ptr[bsize];
  1703   // 64 makes this threshold really big effectively
  1704   // making it so that we very rarely check mvs on
  1705   // segments.   setting this to 1 would make mv thresh
  1706   // roughly equal to what it is for macroblocks
  1707   label_mv_thresh = 1 * bsi->mvthresh / label_count;
  1709   // Segmentation method overheads
  1710   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  1711     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  1712       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
  1713       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
  1714       int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
  1715       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  1716       MB_PREDICTION_MODE mode_selected = ZEROMV;
  1717       int64_t best_rd = INT64_MAX;
  1718       i = idy * 2 + idx;
  1720       frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
  1721       vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
  1722                                     &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
  1723                                     &frame_mv[NEARMV][mbmi->ref_frame[0]],
  1724                                     i, 0, mi_row, mi_col);
  1725       if (has_second_rf) {
  1726         frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
  1727         vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
  1728                                       &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
  1729                                       &frame_mv[NEARMV][mbmi->ref_frame[1]],
  1730                                       i, 1, mi_row, mi_col);
  1732       // search for the best motion vector on this segment
  1733       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
  1734         const struct buf_2d orig_src = x->plane[0].src;
  1735         struct buf_2d orig_pre[2];
  1737         mode_idx = INTER_OFFSET(this_mode);
  1738         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
  1740         // if we're near/nearest and mv == 0,0, compare to zeromv
  1741         if ((this_mode == NEARMV || this_mode == NEARESTMV ||
  1742              this_mode == ZEROMV) &&
  1743             frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
  1744             (!has_second_rf ||
  1745              frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
  1746           int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
  1747           int c1 = cost_mv_ref(cpi, NEARMV, rfc);
  1748           int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
  1749           int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
  1751           if (this_mode == NEARMV) {
  1752             if (c1 > c3)
  1753               continue;
  1754           } else if (this_mode == NEARESTMV) {
  1755             if (c2 > c3)
  1756               continue;
  1757           } else {
  1758             assert(this_mode == ZEROMV);
  1759             if (!has_second_rf) {
  1760               if ((c3 >= c2 &&
  1761                    frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
  1762                   (c3 >= c1 &&
  1763                    frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
  1764                 continue;
  1765             } else {
  1766               if ((c3 >= c2 &&
  1767                    frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
  1768                    frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
  1769                   (c3 >= c1 &&
  1770                    frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
  1771                    frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
  1772                 continue;
  1777         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
  1778         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
  1779                    sizeof(bsi->rdstat[i][mode_idx].ta));
  1780         vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
  1781                    sizeof(bsi->rdstat[i][mode_idx].tl));
  1783         // motion search for newmv (single predictor case only)
  1784         if (!has_second_rf && this_mode == NEWMV &&
  1785             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
  1786           int step_param = 0;
  1787           int further_steps;
  1788           int thissme, bestsme = INT_MAX;
  1789           int sadpb = x->sadperbit4;
  1790           int_mv mvp_full;
  1791           int max_mv;
  1793           /* Is the best so far sufficiently good that we cant justify doing
  1794            * and new motion search. */
  1795           if (best_rd < label_mv_thresh)
  1796             break;
  1798           if (cpi->compressor_speed) {
  1799             // use previous block's result as next block's MV predictor.
  1800             if (i > 0) {
  1801               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
  1802               if (i == 2)
  1803                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
  1806           if (i == 0)
  1807             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
  1808           else
  1809             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
  1811           if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
  1812             // Take wtd average of the step_params based on the last frame's
  1813             // max mv magnitude and the best ref mvs of the current block for
  1814             // the given reference.
  1815             step_param = (vp9_init_search_range(cpi, max_mv) +
  1816                           cpi->mv_step_param) >> 1;
  1817           } else {
  1818             step_param = cpi->mv_step_param;
  1821           mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
  1822           mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
  1824           if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) {
  1825             mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
  1826             mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
  1827             step_param = MAX(step_param, 8);
  1830           further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
  1831           // adjust src pointer for this block
  1832           mi_buf_shift(x, i);
  1833           if (cpi->sf.search_method == HEX) {
  1834             bestsme = vp9_hex_search(x, &mvp_full.as_mv,
  1835                                      step_param,
  1836                                      sadpb, 1, v_fn_ptr, 1,
  1837                                      &bsi->ref_mv->as_mv,
  1838                                      &mode_mv[NEWMV].as_mv);
  1839           } else if (cpi->sf.search_method == SQUARE) {
  1840             bestsme = vp9_square_search(x, &mvp_full.as_mv,
  1841                                         step_param,
  1842                                         sadpb, 1, v_fn_ptr, 1,
  1843                                         &bsi->ref_mv->as_mv,
  1844                                         &mode_mv[NEWMV].as_mv);
  1845           } else if (cpi->sf.search_method == BIGDIA) {
  1846             bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
  1847                                         step_param,
  1848                                         sadpb, 1, v_fn_ptr, 1,
  1849                                         &bsi->ref_mv->as_mv,
  1850                                         &mode_mv[NEWMV].as_mv);
  1851           } else {
  1852             bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
  1853                                              sadpb, further_steps, 0, v_fn_ptr,
  1854                                              bsi->ref_mv, &mode_mv[NEWMV]);
  1857           // Should we do a full search (best quality only)
  1858           if (cpi->compressor_speed == 0) {
  1859             /* Check if mvp_full is within the range. */
  1860             clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
  1861                      x->mv_row_min, x->mv_row_max);
  1863             thissme = cpi->full_search_sad(x, &mvp_full,
  1864                                            sadpb, 16, v_fn_ptr,
  1865                                            x->nmvjointcost, x->mvcost,
  1866                                            bsi->ref_mv, i);
  1868             if (thissme < bestsme) {
  1869               bestsme = thissme;
  1870               mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int;
  1871             } else {
  1872               /* The full search result is actually worse so re-instate the
  1873                * previous best vector */
  1874               mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int;
  1878           if (bestsme < INT_MAX) {
  1879             int distortion;
  1880             unsigned int sse;
  1881             cpi->find_fractional_mv_step(x,
  1882                                          &mode_mv[NEWMV].as_mv,
  1883                                          &bsi->ref_mv->as_mv,
  1884                                          cpi->common.allow_high_precision_mv,
  1885                                          x->errorperbit, v_fn_ptr,
  1886                                          0, cpi->sf.subpel_iters_per_step,
  1887                                          x->nmvjointcost, x->mvcost,
  1888                                          &distortion, &sse);
  1890             // save motion search result for use in compound prediction
  1891             seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
  1894           if (cpi->sf.adaptive_motion_search)
  1895             x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
  1897           // restore src pointers
  1898           mi_buf_restore(x, orig_src, orig_pre);
  1901         if (has_second_rf) {
  1902           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
  1903               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
  1904             continue;
  1907         if (has_second_rf && this_mode == NEWMV &&
  1908             mbmi->interp_filter == EIGHTTAP) {
  1909           // adjust src pointers
  1910           mi_buf_shift(x, i);
  1911           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
  1912             int rate_mv;
  1913             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
  1914                                 mi_row, mi_col, seg_mvs[i],
  1915                                 &rate_mv);
  1916             seg_mvs[i][mbmi->ref_frame[0]].as_int =
  1917                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
  1918             seg_mvs[i][mbmi->ref_frame[1]].as_int =
  1919                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
  1921           // restore src pointers
  1922           mi_buf_restore(x, orig_src, orig_pre);
  1925         bsi->rdstat[i][mode_idx].brate =
  1926             labels2mode(x, i, this_mode, &mode_mv[this_mode],
  1927                         &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
  1928                         bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
  1929                         x->mvcost, cpi);
  1932         bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
  1933         if (num_4x4_blocks_wide > 1)
  1934           bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
  1935               mode_mv[this_mode].as_int;
  1936         if (num_4x4_blocks_high > 1)
  1937           bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
  1938               mode_mv[this_mode].as_int;
  1939         if (has_second_rf) {
  1940           bsi->rdstat[i][mode_idx].mvs[1].as_int =
  1941               second_mode_mv[this_mode].as_int;
  1942           if (num_4x4_blocks_wide > 1)
  1943             bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
  1944                 second_mode_mv[this_mode].as_int;
  1945           if (num_4x4_blocks_high > 1)
  1946             bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
  1947                 second_mode_mv[this_mode].as_int;
  1950         // Trap vectors that reach beyond the UMV borders
  1951         if (mv_check_bounds(x, &mode_mv[this_mode]))
  1952           continue;
  1953         if (has_second_rf &&
  1954             mv_check_bounds(x, &second_mode_mv[this_mode]))
  1955           continue;
  1957         if (filter_idx > 0) {
  1958           BEST_SEG_INFO *ref_bsi = bsi_buf;
  1959           subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
  1960                      (mode_mv[this_mode].as_mv.col & 0x0f);
  1961           have_ref = mode_mv[this_mode].as_int ==
  1962                      ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
  1963           if (has_second_rf) {
  1964             subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
  1965                         (second_mode_mv[this_mode].as_mv.col & 0x0f);
  1966             have_ref  &= second_mode_mv[this_mode].as_int ==
  1967                          ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
  1970           if (filter_idx > 1 && !subpelmv && !have_ref) {
  1971             ref_bsi = bsi_buf + 1;
  1972             have_ref = mode_mv[this_mode].as_int ==
  1973                        ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
  1974             if (has_second_rf) {
  1975               have_ref  &= second_mode_mv[this_mode].as_int ==
  1976                            ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
  1980           if (!subpelmv && have_ref &&
  1981               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
  1982             vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
  1983                        sizeof(SEG_RDSTAT));
  1984             if (num_4x4_blocks_wide > 1)
  1985               bsi->rdstat[i + 1][mode_idx].eobs =
  1986                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
  1987             if (num_4x4_blocks_high > 1)
  1988               bsi->rdstat[i + 2][mode_idx].eobs =
  1989                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
  1991             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
  1992               mode_selected = this_mode;
  1993               best_rd = bsi->rdstat[i][mode_idx].brdcost;
  1995             continue;
  1999         bsi->rdstat[i][mode_idx].brdcost =
  2000             encode_inter_mb_segment(cpi, x,
  2001                                     bsi->segment_rd - this_segment_rd, i,
  2002                                     &bsi->rdstat[i][mode_idx].byrate,
  2003                                     &bsi->rdstat[i][mode_idx].bdist,
  2004                                     &bsi->rdstat[i][mode_idx].bsse,
  2005                                     bsi->rdstat[i][mode_idx].ta,
  2006                                     bsi->rdstat[i][mode_idx].tl);
  2007         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
  2008           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
  2009                                             bsi->rdstat[i][mode_idx].brate, 0);
  2010           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
  2011           bsi->rdstat[i][mode_idx].eobs = pd->eobs[i];
  2012           if (num_4x4_blocks_wide > 1)
  2013             bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1];
  2014           if (num_4x4_blocks_high > 1)
  2015             bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2];
  2018         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
  2019           mode_selected = this_mode;
  2020           best_rd = bsi->rdstat[i][mode_idx].brdcost;
  2022       } /*for each 4x4 mode*/
  2024       if (best_rd == INT64_MAX) {
  2025         int iy, midx;
  2026         for (iy = i + 1; iy < 4; ++iy)
  2027           for (midx = 0; midx < INTER_MODES; ++midx)
  2028             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
  2029         bsi->segment_rd = INT64_MAX;
  2030         return;
  2033       mode_idx = INTER_OFFSET(mode_selected);
  2034       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
  2035       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
  2037       labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
  2038                   &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
  2039                   bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
  2040                   x->mvcost, cpi);
  2042       br += bsi->rdstat[i][mode_idx].brate;
  2043       bd += bsi->rdstat[i][mode_idx].bdist;
  2044       block_sse += bsi->rdstat[i][mode_idx].bsse;
  2045       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
  2046       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
  2048       if (this_segment_rd > bsi->segment_rd) {
  2049         int iy, midx;
  2050         for (iy = i + 1; iy < 4; ++iy)
  2051           for (midx = 0; midx < INTER_MODES; ++midx)
  2052             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
  2053         bsi->segment_rd = INT64_MAX;
  2054         return;
  2057   } /* for each label */
  2059   bsi->r = br;
  2060   bsi->d = bd;
  2061   bsi->segment_yrate = segmentyrate;
  2062   bsi->segment_rd = this_segment_rd;
  2063   bsi->sse = block_sse;
  2065   // update the coding decisions
  2066   for (i = 0; i < 4; ++i)
  2067     bsi->modes[i] = mi->bmi[i].as_mode;
  2070 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
  2071                                            const TileInfo *const tile,
  2072                                            int_mv *best_ref_mv,
  2073                                            int_mv *second_best_ref_mv,
  2074                                            int64_t best_rd,
  2075                                            int *returntotrate,
  2076                                            int *returnyrate,
  2077                                            int64_t *returndistortion,
  2078                                            int *skippable, int64_t *psse,
  2079                                            int mvthresh,
  2080                                            int_mv seg_mvs[4][MAX_REF_FRAMES],
  2081                                            BEST_SEG_INFO *bsi_buf,
  2082                                            int filter_idx,
  2083                                            int mi_row, int mi_col) {
  2084   int i;
  2085   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
  2086   MACROBLOCKD *xd = &x->e_mbd;
  2087   MODE_INFO *mi = xd->mi_8x8[0];
  2088   MB_MODE_INFO *mbmi = &mi->mbmi;
  2089   int mode_idx;
  2091   vp9_zero(*bsi);
  2093   bsi->segment_rd = best_rd;
  2094   bsi->ref_mv = best_ref_mv;
  2095   bsi->second_ref_mv = second_best_ref_mv;
  2096   bsi->mvp.as_int = best_ref_mv->as_int;
  2097   bsi->mvthresh = mvthresh;
  2099   for (i = 0; i < 4; i++)
  2100     bsi->modes[i] = ZEROMV;
  2102   rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
  2103                           mi_row, mi_col);
  2105   if (bsi->segment_rd > best_rd)
  2106     return INT64_MAX;
  2107   /* set it to the best */
  2108   for (i = 0; i < 4; i++) {
  2109     mode_idx = INTER_OFFSET(bsi->modes[i]);
  2110     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
  2111     if (has_second_ref(mbmi))
  2112       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
  2113     xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
  2114     mi->bmi[i].as_mode = bsi->modes[i];
  2117   /*
  2118    * used to set mbmi->mv.as_int
  2119    */
  2120   *returntotrate = bsi->r;
  2121   *returndistortion = bsi->d;
  2122   *returnyrate = bsi->segment_yrate;
  2123   *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
  2124   *psse = bsi->sse;
  2125   mbmi->mode = bsi->modes[3];
  2127   return bsi->segment_rd;
  2130 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
  2131                     uint8_t *ref_y_buffer, int ref_y_stride,
  2132                     int ref_frame, BLOCK_SIZE block_size ) {
  2133   MACROBLOCKD *xd = &x->e_mbd;
  2134   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  2135   int_mv this_mv;
  2136   int i;
  2137   int zero_seen = 0;
  2138   int best_index = 0;
  2139   int best_sad = INT_MAX;
  2140   int this_sad = INT_MAX;
  2141   unsigned int max_mv = 0;
  2143   uint8_t *src_y_ptr = x->plane[0].src.buf;
  2144   uint8_t *ref_y_ptr;
  2145   int row_offset, col_offset;
  2146   int num_mv_refs = MAX_MV_REF_CANDIDATES +
  2147                     (cpi->sf.adaptive_motion_search &&
  2148                      cpi->common.show_frame &&
  2149                      block_size < cpi->sf.max_partition_size);
  2151   // Get the sad for each candidate reference mv
  2152   for (i = 0; i < num_mv_refs; i++) {
  2153     this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ?
  2154         mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int;
  2156     max_mv = MAX(max_mv,
  2157                  MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
  2158     // The list is at an end if we see 0 for a second time.
  2159     if (!this_mv.as_int && zero_seen)
  2160       break;
  2161     zero_seen = zero_seen || !this_mv.as_int;
  2163     row_offset = this_mv.as_mv.row >> 3;
  2164     col_offset = this_mv.as_mv.col >> 3;
  2165     ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
  2167     // Find sad for current vector.
  2168     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
  2169                                            ref_y_ptr, ref_y_stride,
  2170                                            0x7fffffff);
  2172     // Note if it is the best so far.
  2173     if (this_sad < best_sad) {
  2174       best_sad = this_sad;
  2175       best_index = i;
  2179   // Note the index of the mv that worked best in the reference list.
  2180   x->mv_best_ref_index[ref_frame] = best_index;
  2181   x->max_mv_context[ref_frame] = max_mv;
  2184 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
  2185                                      unsigned int *ref_costs_single,
  2186                                      unsigned int *ref_costs_comp,
  2187                                      vp9_prob *comp_mode_p) {
  2188   VP9_COMMON *const cm = &cpi->common;
  2189   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
  2190   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
  2191                                              SEG_LVL_REF_FRAME);
  2192   if (seg_ref_active) {
  2193     vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
  2194     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
  2195     *comp_mode_p = 128;
  2196   } else {
  2197     vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
  2198     vp9_prob comp_inter_p = 128;
  2200     if (cm->comp_pred_mode == HYBRID_PREDICTION) {
  2201       comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
  2202       *comp_mode_p = comp_inter_p;
  2203     } else {
  2204       *comp_mode_p = 128;
  2207     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  2209     if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
  2210       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  2211       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
  2212       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
  2214       if (cm->comp_pred_mode == HYBRID_PREDICTION)
  2215         base_cost += vp9_cost_bit(comp_inter_p, 0);
  2217       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
  2218           ref_costs_single[ALTREF_FRAME] = base_cost;
  2219       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
  2220       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  2221       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  2222       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  2223       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
  2224     } else {
  2225       ref_costs_single[LAST_FRAME]   = 512;
  2226       ref_costs_single[GOLDEN_FRAME] = 512;
  2227       ref_costs_single[ALTREF_FRAME] = 512;
  2229     if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
  2230       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
  2231       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
  2233       if (cm->comp_pred_mode == HYBRID_PREDICTION)
  2234         base_cost += vp9_cost_bit(comp_inter_p, 1);
  2236       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
  2237       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
  2238     } else {
  2239       ref_costs_comp[LAST_FRAME]   = 512;
  2240       ref_costs_comp[GOLDEN_FRAME] = 512;
  2245 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
  2246                          int mode_index,
  2247                          int_mv *ref_mv,
  2248                          int_mv *second_ref_mv,
  2249                          int64_t comp_pred_diff[NB_PREDICTION_TYPES],
  2250                          int64_t tx_size_diff[TX_MODES],
  2251                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
  2252   MACROBLOCKD *const xd = &x->e_mbd;
  2254   // Take a snapshot of the coding context so it can be
  2255   // restored if we decide to encode this way
  2256   ctx->skip = x->skip;
  2257   ctx->best_mode_index = mode_index;
  2258   ctx->mic = *xd->mi_8x8[0];
  2260   ctx->best_ref_mv.as_int = ref_mv->as_int;
  2261   ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
  2263   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
  2264   ctx->comp_pred_diff   = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
  2265   ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
  2267   vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
  2268   vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
  2269              sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
  2272 static void setup_pred_block(const MACROBLOCKD *xd,
  2273                              struct buf_2d dst[MAX_MB_PLANE],
  2274                              const YV12_BUFFER_CONFIG *src,
  2275                              int mi_row, int mi_col,
  2276                              const struct scale_factors *scale,
  2277                              const struct scale_factors *scale_uv) {
  2278   int i;
  2280   dst[0].buf = src->y_buffer;
  2281   dst[0].stride = src->y_stride;
  2282   dst[1].buf = src->u_buffer;
  2283   dst[2].buf = src->v_buffer;
  2284   dst[1].stride = dst[2].stride = src->uv_stride;
  2285 #if CONFIG_ALPHA
  2286   dst[3].buf = src->alpha_buffer;
  2287   dst[3].stride = src->alpha_stride;
  2288 #endif
  2290   // TODO(jkoleszar): Make scale factors per-plane data
  2291   for (i = 0; i < MAX_MB_PLANE; i++) {
  2292     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
  2293                      i ? scale_uv : scale,
  2294                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
  2298 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
  2299                                const TileInfo *const tile,
  2300                                int idx, MV_REFERENCE_FRAME frame_type,
  2301                                BLOCK_SIZE block_size,
  2302                                int mi_row, int mi_col,
  2303                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
  2304                                int_mv frame_near_mv[MAX_REF_FRAMES],
  2305                                struct buf_2d yv12_mb[4][MAX_MB_PLANE],
  2306                                struct scale_factors scale[MAX_REF_FRAMES]) {
  2307   VP9_COMMON *cm = &cpi->common;
  2308   YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
  2309   MACROBLOCKD *const xd = &x->e_mbd;
  2310   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
  2312   // set up scaling factors
  2313   scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
  2315   scale[frame_type].sfc->set_scaled_offsets(&scale[frame_type],
  2316                                             mi_row * MI_SIZE, mi_col * MI_SIZE);
  2318   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
  2319   // use the UV scaling factors.
  2320   setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
  2321                    &scale[frame_type], &scale[frame_type]);
  2323   // Gets an initial list of candidate vectors from neighbours and orders them
  2324   vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0],
  2325                    xd->last_mi,
  2326                    frame_type,
  2327                    mbmi->ref_mvs[frame_type], mi_row, mi_col);
  2329   // Candidate refinement carried out at encoder and decoder
  2330   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
  2331                         mbmi->ref_mvs[frame_type],
  2332                         &frame_nearest_mv[frame_type],
  2333                         &frame_near_mv[frame_type]);
  2335   // Further refinement that is encode side only to test the top few candidates
  2336   // in full and choose the best as the centre point for subsequent searches.
  2337   // The current implementation doesn't support scaling.
  2338   if (!vp9_is_scaled(scale[frame_type].sfc) && block_size >= BLOCK_8X8)
  2339     mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
  2340             frame_type, block_size);
  2343 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
  2344   YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
  2345   int fb = get_ref_frame_idx(cpi, ref_frame);
  2346   int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame);
  2347   if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb])
  2348     scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]];
  2349   return scaled_ref_frame;
  2352 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
  2353   const MACROBLOCKD *const xd = &x->e_mbd;
  2354   const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
  2355   const int ctx = vp9_get_pred_context_switchable_interp(xd);
  2356   return SWITCHABLE_INTERP_RATE_FACTOR *
  2357              x->switchable_interp_costs[ctx][mbmi->interp_filter];
  2360 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
  2361                                  const TileInfo *const tile,
  2362                                  BLOCK_SIZE bsize,
  2363                                  int mi_row, int mi_col,
  2364                                  int_mv *tmp_mv, int *rate_mv) {
  2365   MACROBLOCKD *xd = &x->e_mbd;
  2366   VP9_COMMON *cm = &cpi->common;
  2367   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  2368   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
  2369   int bestsme = INT_MAX;
  2370   int further_steps, step_param;
  2371   int sadpb = x->sadperbit16;
  2372   int_mv mvp_full;
  2373   int ref = mbmi->ref_frame[0];
  2374   int_mv ref_mv = mbmi->ref_mvs[ref][0];
  2375   const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
  2377   int tmp_col_min = x->mv_col_min;
  2378   int tmp_col_max = x->mv_col_max;
  2379   int tmp_row_min = x->mv_row_min;
  2380   int tmp_row_max = x->mv_row_max;
  2382   YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
  2384   if (scaled_ref_frame) {
  2385     int i;
  2386     // Swap out the reference frame for a version that's been scaled to
  2387     // match the resolution of the current frame, allowing the existing
  2388     // motion search code to be used without additional modifications.
  2389     for (i = 0; i < MAX_MB_PLANE; i++)
  2390       backup_yv12[i] = xd->plane[i].pre[0];
  2392     setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  2395   vp9_clamp_mv_min_max(x, &ref_mv.as_mv);
  2397   // Adjust search parameters based on small partitions' result.
  2398   if (x->fast_ms) {
  2399     // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
  2400     // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
  2401     // adjust search range
  2402     step_param = 6;
  2403     if (x->fast_ms > 1)
  2404       step_param = 8;
  2406     // Get prediction MV.
  2407     mvp_full.as_int = x->pred_mv[ref].as_int;
  2409     // Adjust MV sign if needed.
  2410     if (cm->ref_frame_sign_bias[ref]) {
  2411       mvp_full.as_mv.col *= -1;
  2412       mvp_full.as_mv.row *= -1;
  2414   } else {
  2415     // Work out the size of the first step in the mv step search.
  2416     // 0 here is maximum length first step. 1 is MAX >> 1 etc.
  2417     if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
  2418       // Take wtd average of the step_params based on the last frame's
  2419       // max mv magnitude and that based on the best ref mvs of the current
  2420       // block for the given reference.
  2421       step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
  2422                     cpi->mv_step_param) >> 1;
  2423     } else {
  2424       step_param = cpi->mv_step_param;
  2428   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
  2429       cpi->common.show_frame) {
  2430     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
  2431                                                        b_width_log2(bsize)));
  2432     step_param = MAX(step_param, boffset);
  2435   mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ?
  2436       mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int :
  2437       x->pred_mv[ref].as_int;
  2439   mvp_full.as_mv.col >>= 3;
  2440   mvp_full.as_mv.row >>= 3;
  2442   // Further step/diamond searches as necessary
  2443   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
  2445   if (cpi->sf.search_method == HEX) {
  2446     bestsme = vp9_hex_search(x, &mvp_full.as_mv,
  2447                              step_param,
  2448                              sadpb, 1,
  2449                              &cpi->fn_ptr[block_size], 1,
  2450                              &ref_mv.as_mv, &tmp_mv->as_mv);
  2451   } else if (cpi->sf.search_method == SQUARE) {
  2452     bestsme = vp9_square_search(x, &mvp_full.as_mv,
  2453                                 step_param,
  2454                                 sadpb, 1,
  2455                                 &cpi->fn_ptr[block_size], 1,
  2456                                 &ref_mv.as_mv, &tmp_mv->as_mv);
  2457   } else if (cpi->sf.search_method == BIGDIA) {
  2458     bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
  2459                                 step_param,
  2460                                 sadpb, 1,
  2461                                 &cpi->fn_ptr[block_size], 1,
  2462                                 &ref_mv.as_mv, &tmp_mv->as_mv);
  2463   } else {
  2464     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
  2465                                      sadpb, further_steps, 1,
  2466                                      &cpi->fn_ptr[block_size],
  2467                                      &ref_mv, tmp_mv);
  2470   x->mv_col_min = tmp_col_min;
  2471   x->mv_col_max = tmp_col_max;
  2472   x->mv_row_min = tmp_row_min;
  2473   x->mv_row_max = tmp_row_max;
  2475   if (bestsme < INT_MAX) {
  2476     int dis;  /* TODO: use dis in distortion calculation later. */
  2477     unsigned int sse;
  2478     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
  2479                                  cm->allow_high_precision_mv,
  2480                                  x->errorperbit,
  2481                                  &cpi->fn_ptr[block_size],
  2482                                  0, cpi->sf.subpel_iters_per_step,
  2483                                  x->nmvjointcost, x->mvcost,
  2484                                  &dis, &sse);
  2486   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
  2487                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2489   if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
  2490     x->pred_mv[ref].as_int = tmp_mv->as_int;
  2492   if (scaled_ref_frame) {
  2493     int i;
  2494     for (i = 0; i < MAX_MB_PLANE; i++)
  2495       xd->plane[i].pre[0] = backup_yv12[i];
  2499 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
  2500                                 BLOCK_SIZE bsize,
  2501                                 int_mv *frame_mv,
  2502                                 int mi_row, int mi_col,
  2503                                 int_mv single_newmv[MAX_REF_FRAMES],
  2504                                 int *rate_mv) {
  2505   int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
  2506   MACROBLOCKD *xd = &x->e_mbd;
  2507   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  2508   const int refs[2] = { mbmi->ref_frame[0],
  2509                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
  2510   int_mv ref_mv[2];
  2511   const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
  2512   int ite, ref;
  2513   // Prediction buffer from second frame.
  2514   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
  2516   // Do joint motion search in compound mode to get more accurate mv.
  2517   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
  2518   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
  2519   int last_besterr[2] = {INT_MAX, INT_MAX};
  2520   YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
  2521     get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
  2522     get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
  2523   };
  2525   for (ref = 0; ref < 2; ++ref) {
  2526     ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
  2528     if (scaled_ref_frame[ref]) {
  2529       int i;
  2530       // Swap out the reference frame for a version that's been scaled to
  2531       // match the resolution of the current frame, allowing the existing
  2532       // motion search code to be used without additional modifications.
  2533       for (i = 0; i < MAX_MB_PLANE; i++)
  2534         backup_yv12[ref][i] = xd->plane[i].pre[ref];
  2535       setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
  2538     xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref],
  2539                                                   mi_row, mi_col);
  2540     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
  2543   // Allow joint search multiple times iteratively for each ref frame
  2544   // and break out the search loop if it couldn't find better mv.
  2545   for (ite = 0; ite < 4; ite++) {
  2546     struct buf_2d ref_yv12[2];
  2547     int bestsme = INT_MAX;
  2548     int sadpb = x->sadperbit16;
  2549     int_mv tmp_mv;
  2550     int search_range = 3;
  2552     int tmp_col_min = x->mv_col_min;
  2553     int tmp_col_max = x->mv_col_max;
  2554     int tmp_row_min = x->mv_row_min;
  2555     int tmp_row_max = x->mv_row_max;
  2556     int id = ite % 2;
  2558     // Initialized here because of compiler problem in Visual Studio.
  2559     ref_yv12[0] = xd->plane[0].pre[0];
  2560     ref_yv12[1] = xd->plane[0].pre[1];
  2562     // Get pred block from second frame.
  2563     vp9_build_inter_predictor(ref_yv12[!id].buf,
  2564                               ref_yv12[!id].stride,
  2565                               second_pred, pw,
  2566                               &frame_mv[refs[!id]].as_mv,
  2567                               &xd->scale_factor[!id],
  2568                               pw, ph, 0,
  2569                               &xd->subpix, MV_PRECISION_Q3);
  2571     // Compound motion search on first ref frame.
  2572     if (id)
  2573       xd->plane[0].pre[0] = ref_yv12[id];
  2574     vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv);
  2576     // Use mv result from single mode as mvp.
  2577     tmp_mv.as_int = frame_mv[refs[id]].as_int;
  2579     tmp_mv.as_mv.col >>= 3;
  2580     tmp_mv.as_mv.row >>= 3;
  2582     // Small-range full-pixel motion search
  2583     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
  2584                                        search_range,
  2585                                        &cpi->fn_ptr[block_size],
  2586                                        x->nmvjointcost, x->mvcost,
  2587                                        &ref_mv[id], second_pred,
  2588                                        pw, ph);
  2590     x->mv_col_min = tmp_col_min;
  2591     x->mv_col_max = tmp_col_max;
  2592     x->mv_row_min = tmp_row_min;
  2593     x->mv_row_max = tmp_row_max;
  2595     if (bestsme < INT_MAX) {
  2596       int dis; /* TODO: use dis in distortion calculation later. */
  2597       unsigned int sse;
  2599       bestsme = cpi->find_fractional_mv_step_comp(
  2600           x, &tmp_mv.as_mv,
  2601           &ref_mv[id].as_mv,
  2602           cpi->common.allow_high_precision_mv,
  2603           x->errorperbit,
  2604           &cpi->fn_ptr[block_size],
  2605           0, cpi->sf.subpel_iters_per_step,
  2606           x->nmvjointcost, x->mvcost,
  2607           &dis, &sse, second_pred,
  2608           pw, ph);
  2611     if (id)
  2612       xd->plane[0].pre[0] = scaled_first_yv12;
  2614     if (bestsme < last_besterr[id]) {
  2615       frame_mv[refs[id]].as_int = tmp_mv.as_int;
  2616       last_besterr[id] = bestsme;
  2617     } else {
  2618       break;
  2622   *rate_mv = 0;
  2624   for (ref = 0; ref < 2; ++ref) {
  2625     if (scaled_ref_frame[ref]) {
  2626       // restore the predictor
  2627       int i;
  2628       for (i = 0; i < MAX_MB_PLANE; i++)
  2629         xd->plane[i].pre[ref] = backup_yv12[ref][i];
  2632     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
  2633                                 &mbmi->ref_mvs[refs[ref]][0].as_mv,
  2634                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2637   vpx_free(second_pred);
  2640 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
  2641                                  const TileInfo *const tile,
  2642                                  BLOCK_SIZE bsize,
  2643                                  int64_t txfm_cache[],
  2644                                  int *rate2, int64_t *distortion,
  2645                                  int *skippable,
  2646                                  int *rate_y, int64_t *distortion_y,
  2647                                  int *rate_uv, int64_t *distortion_uv,
  2648                                  int *mode_excluded, int *disable_skip,
  2649                                  INTERPOLATION_TYPE *best_filter,
  2650                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
  2651                                  int mi_row, int mi_col,
  2652                                  int_mv single_newmv[MAX_REF_FRAMES],
  2653                                  int64_t *psse,
  2654                                  const int64_t ref_best_rd) {
  2655   VP9_COMMON *cm = &cpi->common;
  2656   MACROBLOCKD *xd = &x->e_mbd;
  2657   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  2658   const int is_comp_pred = has_second_ref(mbmi);
  2659   const int num_refs = is_comp_pred ? 2 : 1;
  2660   const int this_mode = mbmi->mode;
  2661   int_mv *frame_mv = mode_mv[this_mode];
  2662   int i;
  2663   int refs[2] = { mbmi->ref_frame[0],
  2664     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
  2665   int_mv cur_mv[2];
  2666   int64_t this_rd = 0;
  2667   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
  2668   int pred_exists = 0;
  2669   int intpel_mv;
  2670   int64_t rd, best_rd = INT64_MAX;
  2671   int best_needs_copy = 0;
  2672   uint8_t *orig_dst[MAX_MB_PLANE];
  2673   int orig_dst_stride[MAX_MB_PLANE];
  2674   int rs = 0;
  2676   if (is_comp_pred) {
  2677     if (frame_mv[refs[0]].as_int == INVALID_MV ||
  2678         frame_mv[refs[1]].as_int == INVALID_MV)
  2679       return INT64_MAX;
  2682   if (this_mode == NEWMV) {
  2683     int rate_mv;
  2684     if (is_comp_pred) {
  2685       // Initialize mv using single prediction mode result.
  2686       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
  2687       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
  2689       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
  2690         joint_motion_search(cpi, x, bsize, frame_mv,
  2691                             mi_row, mi_col, single_newmv, &rate_mv);
  2692       } else {
  2693         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
  2694                                    &mbmi->ref_mvs[refs[0]][0].as_mv,
  2695                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2696         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
  2697                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
  2698                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2700       *rate2 += rate_mv;
  2701     } else {
  2702       int_mv tmp_mv;
  2703       single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
  2704                            &tmp_mv, &rate_mv);
  2705       *rate2 += rate_mv;
  2706       frame_mv[refs[0]].as_int =
  2707           xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
  2708       single_newmv[refs[0]].as_int = tmp_mv.as_int;
  2712   // if we're near/nearest and mv == 0,0, compare to zeromv
  2713   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
  2714       frame_mv[refs[0]].as_int == 0 &&
  2715       !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
  2716       (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
  2717     int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
  2718     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
  2719     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
  2720     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
  2722     if (this_mode == NEARMV) {
  2723       if (c1 > c3)
  2724         return INT64_MAX;
  2725     } else if (this_mode == NEARESTMV) {
  2726       if (c2 > c3)
  2727         return INT64_MAX;
  2728     } else {
  2729       assert(this_mode == ZEROMV);
  2730       if (num_refs == 1) {
  2731         if ((c3 >= c2 &&
  2732              mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
  2733             (c3 >= c1 &&
  2734              mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
  2735           return INT64_MAX;
  2736       } else {
  2737         if ((c3 >= c2 &&
  2738              mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
  2739              mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
  2740             (c3 >= c1 &&
  2741              mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
  2742              mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
  2743           return INT64_MAX;
  2748   for (i = 0; i < num_refs; ++i) {
  2749     cur_mv[i] = frame_mv[refs[i]];
  2750     // Clip "next_nearest" so that it does not extend to far out of image
  2751     if (this_mode != NEWMV)
  2752       clamp_mv2(&cur_mv[i].as_mv, xd);
  2754     if (mv_check_bounds(x, &cur_mv[i]))
  2755       return INT64_MAX;
  2756     mbmi->mv[i].as_int = cur_mv[i].as_int;
  2759   // do first prediction into the destination buffer. Do the next
  2760   // prediction into a temporary buffer. Then keep track of which one
  2761   // of these currently holds the best predictor, and use the other
  2762   // one for future predictions. In the end, copy from tmp_buf to
  2763   // dst if necessary.
  2764   for (i = 0; i < MAX_MB_PLANE; i++) {
  2765     orig_dst[i] = xd->plane[i].dst.buf;
  2766     orig_dst_stride[i] = xd->plane[i].dst.stride;
  2769   /* We don't include the cost of the second reference here, because there
  2770    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
  2771    * words if you present them in that order, the second one is always known
  2772    * if the first is known */
  2773   *rate2 += cost_mv_ref(cpi, this_mode,
  2774                         mbmi->mode_context[mbmi->ref_frame[0]]);
  2776   if (!(*mode_excluded)) {
  2777     if (is_comp_pred) {
  2778       *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
  2779     } else {
  2780       *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
  2784   pred_exists = 0;
  2785   // Are all MVs integer pel for Y and UV
  2786   intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
  2787       (mbmi->mv[0].as_mv.col & 15) == 0;
  2788   if (is_comp_pred)
  2789     intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
  2790         (mbmi->mv[1].as_mv.col & 15) == 0;
  2791   // Search for best switchable filter by checking the variance of
  2792   // pred error irrespective of whether the filter will be used
  2793   if (cm->mcomp_filter_type != BILINEAR) {
  2794     *best_filter = EIGHTTAP;
  2795     if (x->source_variance <
  2796         cpi->sf.disable_filter_search_var_thresh) {
  2797       *best_filter = EIGHTTAP;
  2798       vp9_zero(cpi->rd_filter_cache);
  2799     } else {
  2800       int i, newbest;
  2801       int tmp_rate_sum = 0;
  2802       int64_t tmp_dist_sum = 0;
  2804       cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
  2805       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
  2806         int j;
  2807         int64_t rs_rd;
  2808         mbmi->interp_filter = i;
  2809         vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
  2810         rs = get_switchable_rate(x);
  2811         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
  2813         if (i > 0 && intpel_mv) {
  2814           cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
  2815                                            tmp_rate_sum, tmp_dist_sum);
  2816           cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
  2817               MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
  2818                   cpi->rd_filter_cache[i] + rs_rd);
  2819           rd = cpi->rd_filter_cache[i];
  2820           if (cm->mcomp_filter_type == SWITCHABLE)
  2821             rd += rs_rd;
  2822         } else {
  2823           int rate_sum = 0;
  2824           int64_t dist_sum = 0;
  2825           if ((cm->mcomp_filter_type == SWITCHABLE &&
  2826                (!i || best_needs_copy)) ||
  2827               (cm->mcomp_filter_type != SWITCHABLE &&
  2828                (cm->mcomp_filter_type == mbmi->interp_filter ||
  2829                 (i == 0 && intpel_mv)))) {
  2830             for (j = 0; j < MAX_MB_PLANE; j++) {
  2831               xd->plane[j].dst.buf = orig_dst[j];
  2832               xd->plane[j].dst.stride = orig_dst_stride[j];
  2834           } else {
  2835             for (j = 0; j < MAX_MB_PLANE; j++) {
  2836               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
  2837               xd->plane[j].dst.stride = 64;
  2840           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
  2841           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
  2842           cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
  2843                                            rate_sum, dist_sum);
  2844           cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
  2845               MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
  2846                   cpi->rd_filter_cache[i] + rs_rd);
  2847           rd = cpi->rd_filter_cache[i];
  2848           if (cm->mcomp_filter_type == SWITCHABLE)
  2849             rd += rs_rd;
  2850           if (i == 0 && intpel_mv) {
  2851             tmp_rate_sum = rate_sum;
  2852             tmp_dist_sum = dist_sum;
  2855         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
  2856           if (rd / 2 > ref_best_rd) {
  2857             for (i = 0; i < MAX_MB_PLANE; i++) {
  2858               xd->plane[i].dst.buf = orig_dst[i];
  2859               xd->plane[i].dst.stride = orig_dst_stride[i];
  2861             return INT64_MAX;
  2864         newbest = i == 0 || rd < best_rd;
  2866         if (newbest) {
  2867           best_rd = rd;
  2868           *best_filter = mbmi->interp_filter;
  2869           if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
  2870             best_needs_copy = !best_needs_copy;
  2873         if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
  2874             (cm->mcomp_filter_type != SWITCHABLE &&
  2875              cm->mcomp_filter_type == mbmi->interp_filter)) {
  2876           pred_exists = 1;
  2880       for (i = 0; i < MAX_MB_PLANE; i++) {
  2881         xd->plane[i].dst.buf = orig_dst[i];
  2882         xd->plane[i].dst.stride = orig_dst_stride[i];
  2886   // Set the appropriate filter
  2887   mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
  2888       cm->mcomp_filter_type : *best_filter;
  2889   vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
  2890   rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
  2892   if (pred_exists) {
  2893     if (best_needs_copy) {
  2894       // again temporarily set the buffers to local memory to prevent a memcpy
  2895       for (i = 0; i < MAX_MB_PLANE; i++) {
  2896         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
  2897         xd->plane[i].dst.stride = 64;
  2900   } else {
  2901     // Handles the special case when a filter that is not in the
  2902     // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
  2903     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
  2907   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
  2908     int tmp_rate;
  2909     int64_t tmp_dist;
  2910     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
  2911     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
  2912     // if current pred_error modeled rd is substantially more than the best
  2913     // so far, do not bother doing full rd
  2914     if (rd / 2 > ref_best_rd) {
  2915       for (i = 0; i < MAX_MB_PLANE; i++) {
  2916         xd->plane[i].dst.buf = orig_dst[i];
  2917         xd->plane[i].dst.stride = orig_dst_stride[i];
  2919       return INT64_MAX;
  2923   if (cpi->common.mcomp_filter_type == SWITCHABLE)
  2924     *rate2 += get_switchable_rate(x);
  2926   if (!is_comp_pred && cpi->enable_encode_breakout) {
  2927     if (cpi->active_map_enabled && x->active_ptr[0] == 0)
  2928       x->skip = 1;
  2929     else if (x->encode_breakout) {
  2930       const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
  2931       const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  2932       unsigned int var, sse;
  2933       // Skipping threshold for ac.
  2934       unsigned int thresh_ac;
  2935       // The encode_breakout input
  2936       unsigned int encode_breakout = x->encode_breakout << 4;
  2937       unsigned int max_thresh = 36000;
  2939       // Use extreme low threshold for static frames to limit skipping.
  2940       if (cpi->enable_encode_breakout == 2)
  2941         max_thresh = 128;
  2943       // Calculate threshold according to dequant value.
  2944       thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
  2946       // Use encode_breakout input if it is bigger than internal threshold.
  2947       if (thresh_ac < encode_breakout)
  2948         thresh_ac = encode_breakout;
  2950       // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
  2951       if (thresh_ac > max_thresh)
  2952         thresh_ac = max_thresh;
  2954       var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
  2955                                    xd->plane[0].dst.buf,
  2956                                    xd->plane[0].dst.stride, &sse);
  2958       // Adjust threshold according to partition size.
  2959       thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
  2960           b_height_log2_lookup[bsize]);
  2962       // Y skipping condition checking
  2963       if (sse < thresh_ac || sse == 0) {
  2964         // Skipping threshold for dc
  2965         unsigned int thresh_dc;
  2967         thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
  2969         // dc skipping checking
  2970         if ((sse - var) < thresh_dc || sse == var) {
  2971           unsigned int sse_u, sse_v;
  2972           unsigned int var_u, var_v;
  2974           var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
  2975                                           x->plane[1].src.stride,
  2976                                           xd->plane[1].dst.buf,
  2977                                           xd->plane[1].dst.stride, &sse_u);
  2979           // U skipping condition checking
  2980           if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
  2981               (sse_u - var_u < thresh_dc || sse_u == var_u)) {
  2982             var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
  2983                                             x->plane[2].src.stride,
  2984                                             xd->plane[2].dst.buf,
  2985                                             xd->plane[2].dst.stride, &sse_v);
  2987             // V skipping condition checking
  2988             if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
  2989                 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
  2990               x->skip = 1;
  2992               // The cost of skip bit needs to be added.
  2993               *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
  2995               // Scaling factor for SSE from spatial domain to frequency domain
  2996               // is 16. Adjust distortion accordingly.
  2997               *distortion_uv = (sse_u + sse_v) << 4;
  2998               *distortion = (sse << 4) + *distortion_uv;
  3000               *disable_skip = 1;
  3001               this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
  3009   if (!x->skip) {
  3010     int skippable_y, skippable_uv;
  3011     int64_t sseuv = INT64_MAX;
  3012     int64_t rdcosty = INT64_MAX;
  3014     // Y cost and distortion
  3015     super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
  3016                     bsize, txfm_cache, ref_best_rd);
  3018     if (*rate_y == INT_MAX) {
  3019       *rate2 = INT_MAX;
  3020       *distortion = INT64_MAX;
  3021       for (i = 0; i < MAX_MB_PLANE; i++) {
  3022         xd->plane[i].dst.buf = orig_dst[i];
  3023         xd->plane[i].dst.stride = orig_dst_stride[i];
  3025       return INT64_MAX;
  3028     *rate2 += *rate_y;
  3029     *distortion += *distortion_y;
  3031     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
  3032     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
  3034     super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
  3035                      bsize, ref_best_rd - rdcosty);
  3036     if (*rate_uv == INT_MAX) {
  3037       *rate2 = INT_MAX;
  3038       *distortion = INT64_MAX;
  3039       for (i = 0; i < MAX_MB_PLANE; i++) {
  3040         xd->plane[i].dst.buf = orig_dst[i];
  3041         xd->plane[i].dst.stride = orig_dst_stride[i];
  3043       return INT64_MAX;
  3046     *psse += sseuv;
  3047     *rate2 += *rate_uv;
  3048     *distortion += *distortion_uv;
  3049     *skippable = skippable_y && skippable_uv;
  3052   for (i = 0; i < MAX_MB_PLANE; i++) {
  3053     xd->plane[i].dst.buf = orig_dst[i];
  3054     xd->plane[i].dst.stride = orig_dst_stride[i];
  3057   return this_rd;  // if 0, this will be re-calculated by caller
  3060 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
  3061                            int max_plane) {
  3062   struct macroblock_plane *const p = x->plane;
  3063   struct macroblockd_plane *const pd = x->e_mbd.plane;
  3064   int i;
  3066   for (i = 0; i < max_plane; ++i) {
  3067     p[i].coeff    = ctx->coeff_pbuf[i][1];
  3068     pd[i].qcoeff  = ctx->qcoeff_pbuf[i][1];
  3069     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
  3070     pd[i].eobs    = ctx->eobs_pbuf[i][1];
  3072     ctx->coeff_pbuf[i][1]   = ctx->coeff_pbuf[i][0];
  3073     ctx->qcoeff_pbuf[i][1]  = ctx->qcoeff_pbuf[i][0];
  3074     ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
  3075     ctx->eobs_pbuf[i][1]    = ctx->eobs_pbuf[i][0];
  3077     ctx->coeff_pbuf[i][0]   = p[i].coeff;
  3078     ctx->qcoeff_pbuf[i][0]  = pd[i].qcoeff;
  3079     ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
  3080     ctx->eobs_pbuf[i][0]    = pd[i].eobs;
  3084 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
  3085                                int *returnrate, int64_t *returndist,
  3086                                BLOCK_SIZE bsize,
  3087                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
  3088   VP9_COMMON *const cm = &cpi->common;
  3089   MACROBLOCKD *const xd = &x->e_mbd;
  3090   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
  3091   int y_skip = 0, uv_skip = 0;
  3092   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
  3093   x->skip_encode = 0;
  3094   ctx->skip = 0;
  3095   xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
  3096   if (bsize >= BLOCK_8X8) {
  3097     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
  3098                                &dist_y, &y_skip, bsize, tx_cache,
  3099                                best_rd) >= best_rd) {
  3100       *returnrate = INT_MAX;
  3101       return;
  3103     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
  3104                             &dist_uv, &uv_skip, bsize);
  3105   } else {
  3106     y_skip = 0;
  3107     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
  3108                                      &dist_y, best_rd) >= best_rd) {
  3109       *returnrate = INT_MAX;
  3110       return;
  3112     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
  3113                             &dist_uv, &uv_skip, BLOCK_8X8);
  3116   if (y_skip && uv_skip) {
  3117     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
  3118                   vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
  3119     *returndist = dist_y + dist_uv;
  3120     vp9_zero(ctx->tx_rd_diff);
  3121   } else {
  3122     int i;
  3123     *returnrate = rate_y + rate_uv +
  3124         vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
  3125     *returndist = dist_y + dist_uv;
  3126     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
  3127       for (i = 0; i < TX_MODES; i++) {
  3128         if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
  3129           ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
  3130         else
  3131           ctx->tx_rd_diff[i] = 0;
  3135   ctx->mic = *xd->mi_8x8[0];
  3138 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
  3139                                   const TileInfo *const tile,
  3140                                   int mi_row, int mi_col,
  3141                                   int *returnrate,
  3142                                   int64_t *returndistortion,
  3143                                   BLOCK_SIZE bsize,
  3144                                   PICK_MODE_CONTEXT *ctx,
  3145                                   int64_t best_rd_so_far) {
  3146   VP9_COMMON *cm = &cpi->common;
  3147   MACROBLOCKD *xd = &x->e_mbd;
  3148   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  3149   const struct segmentation *seg = &cm->seg;
  3150   const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
  3151   MB_PREDICTION_MODE this_mode;
  3152   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
  3153   unsigned char segment_id = mbmi->segment_id;
  3154   int comp_pred, i;
  3155   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  3156   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  3157   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
  3158   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  3159                                     VP9_ALT_FLAG };
  3160   int idx_list[4] = {0,
  3161                      cpi->lst_fb_idx,
  3162                      cpi->gld_fb_idx,
  3163                      cpi->alt_fb_idx};
  3164   int64_t best_rd = best_rd_so_far;
  3165   int64_t best_tx_rd[TX_MODES];
  3166   int64_t best_tx_diff[TX_MODES];
  3167   int64_t best_pred_diff[NB_PREDICTION_TYPES];
  3168   int64_t best_pred_rd[NB_PREDICTION_TYPES];
  3169   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
  3170   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
  3171   MB_MODE_INFO best_mbmode = { 0 };
  3172   int j;
  3173   int mode_index, best_mode_index = 0;
  3174   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
  3175   vp9_prob comp_mode_p;
  3176   int64_t best_intra_rd = INT64_MAX;
  3177   int64_t best_inter_rd = INT64_MAX;
  3178   MB_PREDICTION_MODE best_intra_mode = DC_PRED;
  3179   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
  3180   INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
  3181   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
  3182   int64_t dist_uv[TX_SIZES];
  3183   int skip_uv[TX_SIZES];
  3184   MB_PREDICTION_MODE mode_uv[TX_SIZES];
  3185   struct scale_factors scale_factor[4];
  3186   unsigned int ref_frame_mask = 0;
  3187   unsigned int mode_mask = 0;
  3188   int64_t mode_distortions[MB_MODE_COUNT] = {-1};
  3189   int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
  3190   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
  3191   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
  3192   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
  3193   int best_skip2 = 0;
  3195   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  3197   // Everywhere the flag is set the error is much higher than its neighbors.
  3198   ctx->frames_with_high_error = 0;
  3199   ctx->modes_with_high_error = 0;
  3201   estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
  3202                            &comp_mode_p);
  3204   for (i = 0; i < NB_PREDICTION_TYPES; ++i)
  3205     best_pred_rd[i] = INT64_MAX;
  3206   for (i = 0; i < TX_MODES; i++)
  3207     best_tx_rd[i] = INT64_MAX;
  3208   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  3209     best_filter_rd[i] = INT64_MAX;
  3210   for (i = 0; i < TX_SIZES; i++)
  3211     rate_uv_intra[i] = INT_MAX;
  3213   *returnrate = INT_MAX;
  3215   // Create a mask set to 1 for each reference frame used by a smaller
  3216   // resolution.
  3217   if (cpi->sf.use_avoid_tested_higherror) {
  3218     switch (block_size) {
  3219       case BLOCK_64X64:
  3220         for (i = 0; i < 4; i++) {
  3221           for (j = 0; j < 4; j++) {
  3222             ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
  3223             mode_mask |= x->mb_context[i][j].modes_with_high_error;
  3226         for (i = 0; i < 4; i++) {
  3227           ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
  3228           mode_mask |= x->sb32_context[i].modes_with_high_error;
  3230         break;
  3231       case BLOCK_32X32:
  3232         for (i = 0; i < 4; i++) {
  3233           ref_frame_mask |=
  3234               x->mb_context[x->sb_index][i].frames_with_high_error;
  3235           mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error;
  3237         break;
  3238       default:
  3239         // Until we handle all block sizes set it to present;
  3240         ref_frame_mask = 0;
  3241         mode_mask = 0;
  3242         break;
  3244     ref_frame_mask = ~ref_frame_mask;
  3245     mode_mask = ~mode_mask;
  3248   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
  3249     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
  3250       setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
  3251                          block_size, mi_row, mi_col,
  3252                          frame_mv[NEARESTMV], frame_mv[NEARMV],
  3253                          yv12_mb, scale_factor);
  3255     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
  3256     frame_mv[ZEROMV][ref_frame].as_int = 0;
  3259   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
  3260     int mode_excluded = 0;
  3261     int64_t this_rd = INT64_MAX;
  3262     int disable_skip = 0;
  3263     int compmode_cost = 0;
  3264     int rate2 = 0, rate_y = 0, rate_uv = 0;
  3265     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
  3266     int skippable = 0;
  3267     int64_t tx_cache[TX_MODES];
  3268     int i;
  3269     int this_skip2 = 0;
  3270     int64_t total_sse = INT_MAX;
  3271     int early_term = 0;
  3273     for (i = 0; i < TX_MODES; ++i)
  3274       tx_cache[i] = INT64_MAX;
  3276     x->skip = 0;
  3277     this_mode = vp9_mode_order[mode_index].mode;
  3278     ref_frame = vp9_mode_order[mode_index].ref_frame;
  3279     second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
  3281     // Look at the reference frame of the best mode so far and set the
  3282     // skip mask to look at a subset of the remaining modes.
  3283     if (mode_index > cpi->sf.mode_skip_start) {
  3284       if (mode_index == (cpi->sf.mode_skip_start + 1)) {
  3285         switch (vp9_mode_order[best_mode_index].ref_frame) {
  3286           case INTRA_FRAME:
  3287             cpi->mode_skip_mask = 0;
  3288             break;
  3289           case LAST_FRAME:
  3290             cpi->mode_skip_mask = LAST_FRAME_MODE_MASK;
  3291             break;
  3292           case GOLDEN_FRAME:
  3293             cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
  3294             break;
  3295           case ALTREF_FRAME:
  3296             cpi->mode_skip_mask = ALT_REF_MODE_MASK;
  3297             break;
  3298           case NONE:
  3299           case MAX_REF_FRAMES:
  3300             assert(!"Invalid Reference frame");
  3303       if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
  3304         continue;
  3307     // Skip if the current reference frame has been masked off
  3308     if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
  3309         (cpi->ref_frame_mask & (1 << ref_frame)))
  3310       continue;
  3312     // Test best rd so far against threshold for trying this mode.
  3313     if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
  3314                      cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
  3315         cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
  3316       continue;
  3318     // Do not allow compound prediction if the segment level reference
  3319     // frame feature is in use as in this case there can only be one reference.
  3320     if ((second_ref_frame > INTRA_FRAME) &&
  3321          vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
  3322       continue;
  3324     // Skip some checking based on small partitions' result.
  3325     if (x->fast_ms > 1 && !ref_frame)
  3326       continue;
  3327     if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
  3328       continue;
  3330     if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
  3331       if (!(ref_frame_mask & (1 << ref_frame))) {
  3332         continue;
  3334       if (!(mode_mask & (1 << this_mode))) {
  3335         continue;
  3337       if (second_ref_frame != NONE
  3338           && !(ref_frame_mask & (1 << second_ref_frame))) {
  3339         continue;
  3343     mbmi->ref_frame[0] = ref_frame;
  3344     mbmi->ref_frame[1] = second_ref_frame;
  3346     if (!(ref_frame == INTRA_FRAME
  3347         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
  3348       continue;
  3350     if (!(second_ref_frame == NONE
  3351         || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
  3352       continue;
  3355     comp_pred = second_ref_frame > INTRA_FRAME;
  3356     if (comp_pred) {
  3357       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
  3358         if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
  3359           continue;
  3360       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
  3361         if (ref_frame != best_inter_ref_frame &&
  3362             second_ref_frame != best_inter_ref_frame)
  3363           continue;
  3366     set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
  3367     mbmi->uv_mode = DC_PRED;
  3369     // Evaluate all sub-pel filters irrespective of whether we can use
  3370     // them for this frame.
  3371     mbmi->interp_filter = cm->mcomp_filter_type;
  3372     vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
  3374     if (comp_pred) {
  3375       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
  3376         continue;
  3377       set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
  3379       mode_excluded = mode_excluded
  3380                          ? mode_excluded
  3381                          : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
  3382     } else {
  3383       if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
  3384         mode_excluded =
  3385             mode_excluded ?
  3386                 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
  3390     // Select prediction reference frames.
  3391     for (i = 0; i < MAX_MB_PLANE; i++) {
  3392       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
  3393       if (comp_pred)
  3394         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
  3397     // If the segment reference frame feature is enabled....
  3398     // then do nothing if the current ref frame is not allowed..
  3399     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
  3400         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
  3401             (int)ref_frame) {
  3402       continue;
  3403     // If the segment skip feature is enabled....
  3404     // then do nothing if the current mode is not allowed..
  3405     } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
  3406                (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
  3407       continue;
  3408     // Disable this drop out case if the ref frame
  3409     // segment level feature is enabled for this segment. This is to
  3410     // prevent the possibility that we end up unable to pick any mode.
  3411     } else if (!vp9_segfeature_active(seg, segment_id,
  3412                                       SEG_LVL_REF_FRAME)) {
  3413       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
  3414       // unless ARNR filtering is enabled in which case we want
  3415       // an unfiltered alternative. We allow near/nearest as well
  3416       // because they may result in zero-zero MVs but be cheaper.
  3417       if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
  3418         if ((this_mode != ZEROMV &&
  3419              !(this_mode == NEARMV &&
  3420                frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
  3421              !(this_mode == NEARESTMV &&
  3422                frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
  3423             ref_frame != ALTREF_FRAME) {
  3424           continue;
  3428     // TODO(JBB): This is to make up for the fact that we don't have sad
  3429     // functions that work when the block size reads outside the umv.  We
  3430     // should fix this either by making the motion search just work on
  3431     // a representative block in the boundary ( first ) and then implement a
  3432     // function that does sads when inside the border..
  3433     if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
  3434         this_mode == NEWMV) {
  3435       continue;
  3438 #ifdef MODE_TEST_HIT_STATS
  3439     // TEST/DEBUG CODE
  3440     // Keep a rcord of the number of test hits at each size
  3441     cpi->mode_test_hits[bsize]++;
  3442 #endif
  3445     if (ref_frame == INTRA_FRAME) {
  3446       TX_SIZE uv_tx;
  3447       // Disable intra modes other than DC_PRED for blocks with low variance
  3448       // Threshold for intra skipping based on source variance
  3449       // TODO(debargha): Specialize the threshold for super block sizes
  3450       static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
  3451         64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  3452       };
  3453       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
  3454           this_mode != DC_PRED &&
  3455           x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
  3456         continue;
  3457       // Only search the oblique modes if the best so far is
  3458       // one of the neighboring directional modes
  3459       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
  3460           (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
  3461         if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
  3462           continue;
  3464       mbmi->mode = this_mode;
  3465       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
  3466         if (conditional_skipintra(mbmi->mode, best_intra_mode))
  3467             continue;
  3470       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
  3471                       bsize, tx_cache, best_rd);
  3473       if (rate_y == INT_MAX)
  3474         continue;
  3476       uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
  3477       if (rate_uv_intra[uv_tx] == INT_MAX) {
  3478         choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
  3479                              &rate_uv_tokenonly[uv_tx],
  3480                              &dist_uv[uv_tx], &skip_uv[uv_tx],
  3481                              &mode_uv[uv_tx]);
  3484       rate_uv = rate_uv_tokenonly[uv_tx];
  3485       distortion_uv = dist_uv[uv_tx];
  3486       skippable = skippable && skip_uv[uv_tx];
  3487       mbmi->uv_mode = mode_uv[uv_tx];
  3489       rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
  3490       if (this_mode != DC_PRED && this_mode != TM_PRED)
  3491         rate2 += intra_cost_penalty;
  3492       distortion2 = distortion_y + distortion_uv;
  3493     } else {
  3494       mbmi->mode = this_mode;
  3495       compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
  3496       this_rd = handle_inter_mode(cpi, x, tile, bsize,
  3497                                   tx_cache,
  3498                                   &rate2, &distortion2, &skippable,
  3499                                   &rate_y, &distortion_y,
  3500                                   &rate_uv, &distortion_uv,
  3501                                   &mode_excluded, &disable_skip,
  3502                                   &tmp_best_filter, frame_mv,
  3503                                   mi_row, mi_col,
  3504                                   single_newmv, &total_sse, best_rd);
  3505       if (this_rd == INT64_MAX)
  3506         continue;
  3509     if (cm->comp_pred_mode == HYBRID_PREDICTION) {
  3510       rate2 += compmode_cost;
  3513     // Estimate the reference frame signaling cost and add it
  3514     // to the rolling cost variable.
  3515     if (second_ref_frame > INTRA_FRAME) {
  3516       rate2 += ref_costs_comp[ref_frame];
  3517     } else {
  3518       rate2 += ref_costs_single[ref_frame];
  3521     if (!disable_skip) {
  3522       // Test for the condition where skip block will be activated
  3523       // because there are no non zero coefficients and make any
  3524       // necessary adjustment for rate. Ignore if skip is coded at
  3525       // segment level as the cost wont have been added in.
  3526       // Is Mb level skip allowed (i.e. not coded at segment level).
  3527       const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
  3528                                                          SEG_LVL_SKIP);
  3530       if (skippable) {
  3531         // Back out the coefficient coding costs
  3532         rate2 -= (rate_y + rate_uv);
  3533         // for best yrd calculation
  3534         rate_uv = 0;
  3536         if (mb_skip_allowed) {
  3537           int prob_skip_cost;
  3539           // Cost the skip mb case
  3540           vp9_prob skip_prob =
  3541             vp9_get_pred_prob_mbskip(cm, xd);
  3543           if (skip_prob) {
  3544             prob_skip_cost = vp9_cost_bit(skip_prob, 1);
  3545             rate2 += prob_skip_cost;
  3548       } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
  3549         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
  3550             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
  3551           // Add in the cost of the no skip flag.
  3552           int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
  3553                                             0);
  3554           rate2 += prob_skip_cost;
  3555         } else {
  3556           // FIXME(rbultje) make this work for splitmv also
  3557           int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
  3558                                             1);
  3559           rate2 += prob_skip_cost;
  3560           distortion2 = total_sse;
  3561           assert(total_sse >= 0);
  3562           rate2 -= (rate_y + rate_uv);
  3563           rate_y = 0;
  3564           rate_uv = 0;
  3565           this_skip2 = 1;
  3567       } else if (mb_skip_allowed) {
  3568         // Add in the cost of the no skip flag.
  3569         int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
  3570                                           0);
  3571         rate2 += prob_skip_cost;
  3574       // Calculate the final RD estimate for this mode.
  3575       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  3578     // Keep record of best intra rd
  3579     if (!is_inter_block(&xd->mi_8x8[0]->mbmi) &&
  3580         this_rd < best_intra_rd) {
  3581       best_intra_rd = this_rd;
  3582       best_intra_mode = xd->mi_8x8[0]->mbmi.mode;
  3585     // Keep record of best inter rd with single reference
  3586     if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
  3587         !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
  3588         !mode_excluded && this_rd < best_inter_rd) {
  3589       best_inter_rd = this_rd;
  3590       best_inter_ref_frame = ref_frame;
  3593     if (!disable_skip && ref_frame == INTRA_FRAME) {
  3594       for (i = 0; i < NB_PREDICTION_TYPES; ++i)
  3595         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
  3596       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  3597         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
  3600     // Store the respective mode distortions for later use.
  3601     if (mode_distortions[this_mode] == -1
  3602         || distortion2 < mode_distortions[this_mode]) {
  3603       mode_distortions[this_mode] = distortion2;
  3605     if (frame_distortions[ref_frame] == -1
  3606         || distortion2 < frame_distortions[ref_frame]) {
  3607       frame_distortions[ref_frame] = distortion2;
  3610     // Did this mode help.. i.e. is it the new best mode
  3611     if (this_rd < best_rd || x->skip) {
  3612       int max_plane = MAX_MB_PLANE;
  3613       if (!mode_excluded) {
  3614         // Note index of best mode so far
  3615         best_mode_index = mode_index;
  3617         if (ref_frame == INTRA_FRAME) {
  3618           /* required for left and above block mv */
  3619           mbmi->mv[0].as_int = 0;
  3620           max_plane = 1;
  3623         *returnrate = rate2;
  3624         *returndistortion = distortion2;
  3625         best_rd = this_rd;
  3626         best_mbmode = *mbmi;
  3627         best_skip2 = this_skip2;
  3628         if (!x->select_txfm_size)
  3629           swap_block_ptr(x, ctx, max_plane);
  3630         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
  3631                    sizeof(uint8_t) * ctx->num_4x4_blk);
  3633         // TODO(debargha): enhance this test with a better distortion prediction
  3634         // based on qp, activity mask and history
  3635         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
  3636             (mode_index > MIN_EARLY_TERM_INDEX)) {
  3637           const int qstep = xd->plane[0].dequant[1];
  3638           // TODO(debargha): Enhance this by specializing for each mode_index
  3639           int scale = 4;
  3640           if (x->source_variance < UINT_MAX) {
  3641             const int var_adjust = (x->source_variance < 16);
  3642             scale -= var_adjust;
  3644           if (ref_frame > INTRA_FRAME &&
  3645               distortion2 * scale < qstep * qstep) {
  3646             early_term = 1;
  3652     /* keep record of best compound/single-only prediction */
  3653     if (!disable_skip && ref_frame != INTRA_FRAME) {
  3654       int single_rd, hybrid_rd, single_rate, hybrid_rate;
  3656       if (cm->comp_pred_mode == HYBRID_PREDICTION) {
  3657         single_rate = rate2 - compmode_cost;
  3658         hybrid_rate = rate2;
  3659       } else {
  3660         single_rate = rate2;
  3661         hybrid_rate = rate2 + compmode_cost;
  3664       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
  3665       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
  3667       if (second_ref_frame <= INTRA_FRAME &&
  3668           single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
  3669         best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
  3670       } else if (second_ref_frame > INTRA_FRAME &&
  3671                  single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
  3672         best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
  3674       if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
  3675         best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
  3678     /* keep record of best filter type */
  3679     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
  3680         cm->mcomp_filter_type != BILINEAR) {
  3681       int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
  3682                               SWITCHABLE_FILTERS : cm->mcomp_filter_type];
  3683       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  3684         int64_t adj_rd;
  3685         // In cases of poor prediction, filter_cache[] can contain really big
  3686         // values, which actually are bigger than this_rd itself. This can
  3687         // cause negative best_filter_rd[] values, which is obviously silly.
  3688         // Therefore, if filter_cache < ref, we do an adjusted calculation.
  3689         if (cpi->rd_filter_cache[i] >= ref) {
  3690           adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
  3691         } else {
  3692           // FIXME(rbultje) do this for comppsred also
  3693           //
  3694           // To prevent out-of-range computation in
  3695           //    adj_rd = cpi->rd_filter_cache[i] * this_rd / ref
  3696           // cpi->rd_filter_cache[i] / ref is converted to a 256 based ratio.
  3697           int tmp = cpi->rd_filter_cache[i] * 256 / ref;
  3698           adj_rd = (this_rd * tmp) >> 8;
  3700         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
  3704     /* keep record of best txfm size */
  3705     if (bsize < BLOCK_32X32) {
  3706       if (bsize < BLOCK_16X16)
  3707         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
  3709       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
  3711     if (!mode_excluded && this_rd != INT64_MAX) {
  3712       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
  3713         int64_t adj_rd = INT64_MAX;
  3714         adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
  3716         if (adj_rd < best_tx_rd[i])
  3717           best_tx_rd[i] = adj_rd;
  3721     if (early_term)
  3722       break;
  3724     if (x->skip && !comp_pred)
  3725       break;
  3728   if (best_rd >= best_rd_so_far)
  3729     return INT64_MAX;
  3731   // If we used an estimate for the uv intra rd in the loop above...
  3732   if (cpi->sf.use_uv_intra_rd_estimate) {
  3733     // Do Intra UV best rd mode selection if best mode choice above was intra.
  3734     if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
  3735       TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
  3736       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
  3737                               &rate_uv_tokenonly[uv_tx_size],
  3738                               &dist_uv[uv_tx_size],
  3739                               &skip_uv[uv_tx_size],
  3740                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
  3744   // If we are using reference masking and the set mask flag is set then
  3745   // create the reference frame mask.
  3746   if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
  3747     cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
  3749   // Flag all modes that have a distortion thats > 2x the best we found at
  3750   // this level.
  3751   for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
  3752     if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
  3753       continue;
  3755     if (mode_distortions[mode_index] > 2 * *returndistortion) {
  3756       ctx->modes_with_high_error |= (1 << mode_index);
  3760   // Flag all ref frames that have a distortion thats > 2x the best we found at
  3761   // this level.
  3762   for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
  3763     if (frame_distortions[ref_frame] > 2 * *returndistortion) {
  3764       ctx->frames_with_high_error |= (1 << ref_frame);
  3768   assert((cm->mcomp_filter_type == SWITCHABLE) ||
  3769          (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
  3770          (best_mbmode.ref_frame[0] == INTRA_FRAME));
  3772   // Updating rd_thresh_freq_fact[] here means that the different
  3773   // partition/block sizes are handled independently based on the best
  3774   // choice for the current partition. It may well be better to keep a scaled
  3775   // best rd so far value and update rd_thresh_freq_fact based on the mode/size
  3776   // combination that wins out.
  3777   if (cpi->sf.adaptive_rd_thresh) {
  3778     for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
  3779       if (mode_index == best_mode_index) {
  3780         cpi->rd_thresh_freq_fact[bsize][mode_index] -=
  3781           (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
  3782       } else {
  3783         cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
  3784         if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
  3785             (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
  3786           cpi->rd_thresh_freq_fact[bsize][mode_index] =
  3787             cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
  3793   // macroblock modes
  3794   *mbmi = best_mbmode;
  3795   x->skip |= best_skip2;
  3797   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
  3798     if (best_pred_rd[i] == INT64_MAX)
  3799       best_pred_diff[i] = INT_MIN;
  3800     else
  3801       best_pred_diff[i] = best_rd - best_pred_rd[i];
  3804   if (!x->skip) {
  3805     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  3806       if (best_filter_rd[i] == INT64_MAX)
  3807         best_filter_diff[i] = 0;
  3808       else
  3809         best_filter_diff[i] = best_rd - best_filter_rd[i];
  3811     if (cm->mcomp_filter_type == SWITCHABLE)
  3812       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
  3813   } else {
  3814     vp9_zero(best_filter_diff);
  3817   if (!x->skip) {
  3818     for (i = 0; i < TX_MODES; i++) {
  3819       if (best_tx_rd[i] == INT64_MAX)
  3820         best_tx_diff[i] = 0;
  3821       else
  3822         best_tx_diff[i] = best_rd - best_tx_rd[i];
  3824   } else {
  3825     vp9_zero(best_tx_diff);
  3828   set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
  3829                     scale_factor);
  3830   store_coding_context(x, ctx, best_mode_index,
  3831                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
  3832                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
  3833                                       mbmi->ref_frame[1]][0],
  3834                        best_pred_diff, best_tx_diff, best_filter_diff);
  3836   return best_rd;
  3840 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
  3841                                       const TileInfo *const tile,
  3842                                       int mi_row, int mi_col,
  3843                                       int *returnrate,
  3844                                       int64_t *returndistortion,
  3845                                       BLOCK_SIZE bsize,
  3846                                       PICK_MODE_CONTEXT *ctx,
  3847                                       int64_t best_rd_so_far) {
  3848   VP9_COMMON *cm = &cpi->common;
  3849   MACROBLOCKD *xd = &x->e_mbd;
  3850   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
  3851   const struct segmentation *seg = &cm->seg;
  3852   const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
  3853   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
  3854   unsigned char segment_id = mbmi->segment_id;
  3855   int comp_pred, i;
  3856   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  3857   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  3858   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  3859                                     VP9_ALT_FLAG };
  3860   int idx_list[4] = {0,
  3861                      cpi->lst_fb_idx,
  3862                      cpi->gld_fb_idx,
  3863                      cpi->alt_fb_idx};
  3864   int64_t best_rd = best_rd_so_far;
  3865   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
  3866   int64_t best_tx_rd[TX_MODES];
  3867   int64_t best_tx_diff[TX_MODES];
  3868   int64_t best_pred_diff[NB_PREDICTION_TYPES];
  3869   int64_t best_pred_rd[NB_PREDICTION_TYPES];
  3870   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
  3871   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
  3872   MB_MODE_INFO best_mbmode = { 0 };
  3873   int mode_index, best_mode_index = 0;
  3874   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
  3875   vp9_prob comp_mode_p;
  3876   int64_t best_inter_rd = INT64_MAX;
  3877   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
  3878   INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
  3879   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
  3880   int64_t dist_uv[TX_SIZES];
  3881   int skip_uv[TX_SIZES];
  3882   MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
  3883   struct scale_factors scale_factor[4];
  3884   unsigned int ref_frame_mask = 0;
  3885   unsigned int mode_mask = 0;
  3886   int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
  3887                                              cpi->common.y_dc_delta_q);
  3888   int_mv seg_mvs[4][MAX_REF_FRAMES];
  3889   b_mode_info best_bmodes[4];
  3890   int best_skip2 = 0;
  3892   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  3893   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
  3895   for (i = 0; i < 4; i++) {
  3896     int j;
  3897     for (j = 0; j < MAX_REF_FRAMES; j++)
  3898       seg_mvs[i][j].as_int = INVALID_MV;
  3901   estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
  3902                            &comp_mode_p);
  3904   for (i = 0; i < NB_PREDICTION_TYPES; ++i)
  3905     best_pred_rd[i] = INT64_MAX;
  3906   for (i = 0; i < TX_MODES; i++)
  3907     best_tx_rd[i] = INT64_MAX;
  3908   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  3909     best_filter_rd[i] = INT64_MAX;
  3910   for (i = 0; i < TX_SIZES; i++)
  3911     rate_uv_intra[i] = INT_MAX;
  3913   *returnrate = INT_MAX;
  3915   // Create a mask set to 1 for each reference frame used by a smaller
  3916   // resolution.
  3917   if (cpi->sf.use_avoid_tested_higherror) {
  3918     ref_frame_mask = 0;
  3919     mode_mask = 0;
  3920     ref_frame_mask = ~ref_frame_mask;
  3921     mode_mask = ~mode_mask;
  3924   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
  3925     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
  3926       setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
  3927                          block_size, mi_row, mi_col,
  3928                          frame_mv[NEARESTMV], frame_mv[NEARMV],
  3929                          yv12_mb, scale_factor);
  3931     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
  3932     frame_mv[ZEROMV][ref_frame].as_int = 0;
  3935   for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
  3936     int mode_excluded = 0;
  3937     int64_t this_rd = INT64_MAX;
  3938     int disable_skip = 0;
  3939     int compmode_cost = 0;
  3940     int rate2 = 0, rate_y = 0, rate_uv = 0;
  3941     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
  3942     int skippable = 0;
  3943     int64_t tx_cache[TX_MODES];
  3944     int i;
  3945     int this_skip2 = 0;
  3946     int64_t total_sse = INT_MAX;
  3947     int early_term = 0;
  3949     for (i = 0; i < TX_MODES; ++i)
  3950       tx_cache[i] = INT64_MAX;
  3952     x->skip = 0;
  3953     ref_frame = vp9_ref_order[mode_index].ref_frame;
  3954     second_ref_frame = vp9_ref_order[mode_index].second_ref_frame;
  3956     // Look at the reference frame of the best mode so far and set the
  3957     // skip mask to look at a subset of the remaining modes.
  3958     if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
  3959       if (mode_index == 3) {
  3960         switch (vp9_ref_order[best_mode_index].ref_frame) {
  3961           case INTRA_FRAME:
  3962             cpi->mode_skip_mask = 0;
  3963             break;
  3964           case LAST_FRAME:
  3965             cpi->mode_skip_mask = 0x0010;
  3966             break;
  3967           case GOLDEN_FRAME:
  3968             cpi->mode_skip_mask = 0x0008;
  3969             break;
  3970           case ALTREF_FRAME:
  3971             cpi->mode_skip_mask = 0x0000;
  3972             break;
  3973           case NONE:
  3974           case MAX_REF_FRAMES:
  3975             assert(!"Invalid Reference frame");
  3978       if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
  3979         continue;
  3982     // Skip if the current reference frame has been masked off
  3983     if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
  3984         (cpi->ref_frame_mask & (1 << ref_frame)))
  3985       continue;
  3987     // Test best rd so far against threshold for trying this mode.
  3988     if ((best_rd <
  3989          ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
  3990           cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
  3991         cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
  3992       continue;
  3994     // Do not allow compound prediction if the segment level reference
  3995     // frame feature is in use as in this case there can only be one reference.
  3996     if ((second_ref_frame > INTRA_FRAME) &&
  3997          vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
  3998       continue;
  4000     mbmi->ref_frame[0] = ref_frame;
  4001     mbmi->ref_frame[1] = second_ref_frame;
  4003     if (!(ref_frame == INTRA_FRAME
  4004         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
  4005       continue;
  4007     if (!(second_ref_frame == NONE
  4008         || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
  4009       continue;
  4012     comp_pred = second_ref_frame > INTRA_FRAME;
  4013     if (comp_pred) {
  4014       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
  4015         if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME)
  4016           continue;
  4017       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
  4018         if (ref_frame != best_inter_ref_frame &&
  4019             second_ref_frame != best_inter_ref_frame)
  4020           continue;
  4023     // TODO(jingning, jkoleszar): scaling reference frame not supported for
  4024     // sub8x8 blocks.
  4025     if (ref_frame > 0 &&
  4026         vp9_is_scaled(scale_factor[ref_frame].sfc))
  4027       continue;
  4029     if (second_ref_frame > 0 &&
  4030         vp9_is_scaled(scale_factor[second_ref_frame].sfc))
  4031       continue;
  4033     set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
  4034     mbmi->uv_mode = DC_PRED;
  4036     // Evaluate all sub-pel filters irrespective of whether we can use
  4037     // them for this frame.
  4038     mbmi->interp_filter = cm->mcomp_filter_type;
  4039     vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
  4041     if (comp_pred) {
  4042       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
  4043         continue;
  4044       set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
  4046       mode_excluded = mode_excluded
  4047                          ? mode_excluded
  4048                          : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
  4049     } else {
  4050       if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
  4051         mode_excluded =
  4052             mode_excluded ?
  4053                 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
  4057     // Select prediction reference frames.
  4058     for (i = 0; i < MAX_MB_PLANE; i++) {
  4059       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
  4060       if (comp_pred)
  4061         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
  4064     // If the segment reference frame feature is enabled....
  4065     // then do nothing if the current ref frame is not allowed..
  4066     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
  4067         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
  4068             (int)ref_frame) {
  4069       continue;
  4070     // If the segment skip feature is enabled....
  4071     // then do nothing if the current mode is not allowed..
  4072     } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
  4073                ref_frame != INTRA_FRAME) {
  4074       continue;
  4075     // Disable this drop out case if the ref frame
  4076     // segment level feature is enabled for this segment. This is to
  4077     // prevent the possibility that we end up unable to pick any mode.
  4078     } else if (!vp9_segfeature_active(seg, segment_id,
  4079                                       SEG_LVL_REF_FRAME)) {
  4080       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
  4081       // unless ARNR filtering is enabled in which case we want
  4082       // an unfiltered alternative. We allow near/nearest as well
  4083       // because they may result in zero-zero MVs but be cheaper.
  4084       if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
  4085         continue;
  4088 #ifdef MODE_TEST_HIT_STATS
  4089     // TEST/DEBUG CODE
  4090     // Keep a rcord of the number of test hits at each size
  4091     cpi->mode_test_hits[bsize]++;
  4092 #endif
  4094     if (ref_frame == INTRA_FRAME) {
  4095       int rate;
  4096       mbmi->tx_size = TX_4X4;
  4097       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
  4098                                        &distortion_y, best_rd) >= best_rd)
  4099         continue;
  4100       rate2 += rate;
  4101       rate2 += intra_cost_penalty;
  4102       distortion2 += distortion_y;
  4104       if (rate_uv_intra[TX_4X4] == INT_MAX) {
  4105         choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
  4106                              &rate_uv_tokenonly[TX_4X4],
  4107                              &dist_uv[TX_4X4], &skip_uv[TX_4X4],
  4108                              &mode_uv[TX_4X4]);
  4110       rate2 += rate_uv_intra[TX_4X4];
  4111       rate_uv = rate_uv_tokenonly[TX_4X4];
  4112       distortion2 += dist_uv[TX_4X4];
  4113       distortion_uv = dist_uv[TX_4X4];
  4114       mbmi->uv_mode = mode_uv[TX_4X4];
  4115       tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  4116       for (i = 0; i < TX_MODES; ++i)
  4117         tx_cache[i] = tx_cache[ONLY_4X4];
  4118     } else {
  4119       int rate;
  4120       int64_t distortion;
  4121       int64_t this_rd_thresh;
  4122       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
  4123       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
  4124       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
  4125       int tmp_best_skippable = 0;
  4126       int switchable_filter_index;
  4127       int_mv *second_ref = comp_pred ?
  4128                              &mbmi->ref_mvs[second_ref_frame][0] : NULL;
  4129       b_mode_info tmp_best_bmodes[16];
  4130       MB_MODE_INFO tmp_best_mbmode;
  4131       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
  4132       int pred_exists = 0;
  4133       int uv_skippable;
  4135       this_rd_thresh = (ref_frame == LAST_FRAME) ?
  4136           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
  4137           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
  4138       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
  4139           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
  4140       xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
  4142       cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
  4143       if (cm->mcomp_filter_type != BILINEAR) {
  4144         tmp_best_filter = EIGHTTAP;
  4145         if (x->source_variance <
  4146             cpi->sf.disable_filter_search_var_thresh) {
  4147           tmp_best_filter = EIGHTTAP;
  4148           vp9_zero(cpi->rd_filter_cache);
  4149         } else {
  4150           for (switchable_filter_index = 0;
  4151                switchable_filter_index < SWITCHABLE_FILTERS;
  4152                ++switchable_filter_index) {
  4153             int newbest, rs;
  4154             int64_t rs_rd;
  4155             mbmi->interp_filter = switchable_filter_index;
  4156             vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
  4158             tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
  4159                                                  &mbmi->ref_mvs[ref_frame][0],
  4160                                                  second_ref,
  4161                                                  best_yrd,
  4162                                                  &rate, &rate_y, &distortion,
  4163                                                  &skippable, &total_sse,
  4164                                                  (int)this_rd_thresh, seg_mvs,
  4165                                                  bsi, switchable_filter_index,
  4166                                                  mi_row, mi_col);
  4168             if (tmp_rd == INT64_MAX)
  4169               continue;
  4170             cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
  4171             rs = get_switchable_rate(x);
  4172             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
  4173             cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
  4174                 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
  4175                     tmp_rd + rs_rd);
  4176             if (cm->mcomp_filter_type == SWITCHABLE)
  4177               tmp_rd += rs_rd;
  4179             newbest = (tmp_rd < tmp_best_rd);
  4180             if (newbest) {
  4181               tmp_best_filter = mbmi->interp_filter;
  4182               tmp_best_rd = tmp_rd;
  4184             if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
  4185                 (mbmi->interp_filter == cm->mcomp_filter_type &&
  4186                  cm->mcomp_filter_type != SWITCHABLE)) {
  4187               tmp_best_rdu = tmp_rd;
  4188               tmp_best_rate = rate;
  4189               tmp_best_ratey = rate_y;
  4190               tmp_best_distortion = distortion;
  4191               tmp_best_sse = total_sse;
  4192               tmp_best_skippable = skippable;
  4193               tmp_best_mbmode = *mbmi;
  4194               for (i = 0; i < 4; i++) {
  4195                 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
  4196                 x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i];
  4198               pred_exists = 1;
  4199               if (switchable_filter_index == 0 &&
  4200                   cpi->sf.use_rd_breakout &&
  4201                   best_rd < INT64_MAX) {
  4202                 if (tmp_best_rdu / 2 > best_rd) {
  4203                   // skip searching the other filters if the first is
  4204                   // already substantially larger than the best so far
  4205                   tmp_best_filter = mbmi->interp_filter;
  4206                   tmp_best_rdu = INT64_MAX;
  4207                   break;
  4211           }  // switchable_filter_index loop
  4215       if (tmp_best_rdu == INT64_MAX)
  4216         continue;
  4218       mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
  4219                              tmp_best_filter : cm->mcomp_filter_type);
  4220       vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
  4221       if (!pred_exists) {
  4222         // Handles the special case when a filter that is not in the
  4223         // switchable list (bilinear, 6-tap) is indicated at the frame level
  4224         tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
  4225                      &mbmi->ref_mvs[ref_frame][0],
  4226                      second_ref,
  4227                      best_yrd,
  4228                      &rate, &rate_y, &distortion,
  4229                      &skippable, &total_sse,
  4230                      (int)this_rd_thresh, seg_mvs,
  4231                      bsi, 0,
  4232                      mi_row, mi_col);
  4233         if (tmp_rd == INT64_MAX)
  4234           continue;
  4235       } else {
  4236         if (cpi->common.mcomp_filter_type == SWITCHABLE) {
  4237           int rs = get_switchable_rate(x);
  4238           tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
  4240         tmp_rd = tmp_best_rdu;
  4241         total_sse = tmp_best_sse;
  4242         rate = tmp_best_rate;
  4243         rate_y = tmp_best_ratey;
  4244         distortion = tmp_best_distortion;
  4245         skippable = tmp_best_skippable;
  4246         *mbmi = tmp_best_mbmode;
  4247         for (i = 0; i < 4; i++)
  4248           xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
  4251       rate2 += rate;
  4252       distortion2 += distortion;
  4254       if (cpi->common.mcomp_filter_type == SWITCHABLE)
  4255         rate2 += get_switchable_rate(x);
  4257       if (!mode_excluded) {
  4258         if (comp_pred)
  4259           mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
  4260         else
  4261           mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
  4263       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
  4265       tmp_best_rdu = best_rd -
  4266           MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
  4267               RDCOST(x->rdmult, x->rddiv, 0, total_sse));
  4269       if (tmp_best_rdu > 0) {
  4270         // If even the 'Y' rd value of split is higher than best so far
  4271         // then dont bother looking at UV
  4272         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
  4273                                         BLOCK_8X8);
  4274         super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
  4275                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
  4276         if (rate_uv == INT_MAX)
  4277           continue;
  4278         rate2 += rate_uv;
  4279         distortion2 += distortion_uv;
  4280         skippable = skippable && uv_skippable;
  4281         total_sse += uv_sse;
  4283         tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  4284         for (i = 0; i < TX_MODES; ++i)
  4285           tx_cache[i] = tx_cache[ONLY_4X4];
  4289     if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
  4290       rate2 += compmode_cost;
  4293     // Estimate the reference frame signaling cost and add it
  4294     // to the rolling cost variable.
  4295     if (second_ref_frame > INTRA_FRAME) {
  4296       rate2 += ref_costs_comp[ref_frame];
  4297     } else {
  4298       rate2 += ref_costs_single[ref_frame];
  4301     if (!disable_skip) {
  4302       // Test for the condition where skip block will be activated
  4303       // because there are no non zero coefficients and make any
  4304       // necessary adjustment for rate. Ignore if skip is coded at
  4305       // segment level as the cost wont have been added in.
  4306       // Is Mb level skip allowed (i.e. not coded at segment level).
  4307       const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
  4308                                                          SEG_LVL_SKIP);
  4310       if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
  4311         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
  4312             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
  4313           // Add in the cost of the no skip flag.
  4314           int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
  4315                                             0);
  4316           rate2 += prob_skip_cost;
  4317         } else {
  4318           // FIXME(rbultje) make this work for splitmv also
  4319           int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
  4320                                             1);
  4321           rate2 += prob_skip_cost;
  4322           distortion2 = total_sse;
  4323           assert(total_sse >= 0);
  4324           rate2 -= (rate_y + rate_uv);
  4325           rate_y = 0;
  4326           rate_uv = 0;
  4327           this_skip2 = 1;
  4329       } else if (mb_skip_allowed) {
  4330         // Add in the cost of the no skip flag.
  4331         int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
  4332                                           0);
  4333         rate2 += prob_skip_cost;
  4336       // Calculate the final RD estimate for this mode.
  4337       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  4340     // Keep record of best inter rd with single reference
  4341     if (xd->mi_8x8[0]->mbmi.ref_frame[0] > INTRA_FRAME &&
  4342         xd->mi_8x8[0]->mbmi.ref_frame[1] == NONE &&
  4343         !mode_excluded &&
  4344         this_rd < best_inter_rd) {
  4345       best_inter_rd = this_rd;
  4346       best_inter_ref_frame = ref_frame;
  4349     if (!disable_skip && ref_frame == INTRA_FRAME) {
  4350       for (i = 0; i < NB_PREDICTION_TYPES; ++i)
  4351         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
  4352       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  4353         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
  4356     // Did this mode help.. i.e. is it the new best mode
  4357     if (this_rd < best_rd || x->skip) {
  4358       if (!mode_excluded) {
  4359         int max_plane = MAX_MB_PLANE;
  4360         // Note index of best mode so far
  4361         best_mode_index = mode_index;
  4363         if (ref_frame == INTRA_FRAME) {
  4364           /* required for left and above block mv */
  4365           mbmi->mv[0].as_int = 0;
  4366           max_plane = 1;
  4369         *returnrate = rate2;
  4370         *returndistortion = distortion2;
  4371         best_rd = this_rd;
  4372         best_yrd = best_rd -
  4373                    RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
  4374         best_mbmode = *mbmi;
  4375         best_skip2 = this_skip2;
  4376         if (!x->select_txfm_size)
  4377           swap_block_ptr(x, ctx, max_plane);
  4378         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
  4379                    sizeof(uint8_t) * ctx->num_4x4_blk);
  4381         for (i = 0; i < 4; i++)
  4382           best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
  4384         // TODO(debargha): enhance this test with a better distortion prediction
  4385         // based on qp, activity mask and history
  4386         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
  4387             (mode_index > MIN_EARLY_TERM_INDEX)) {
  4388           const int qstep = xd->plane[0].dequant[1];
  4389           // TODO(debargha): Enhance this by specializing for each mode_index
  4390           int scale = 4;
  4391           if (x->source_variance < UINT_MAX) {
  4392             const int var_adjust = (x->source_variance < 16);
  4393             scale -= var_adjust;
  4395           if (ref_frame > INTRA_FRAME &&
  4396               distortion2 * scale < qstep * qstep) {
  4397             early_term = 1;
  4403     /* keep record of best compound/single-only prediction */
  4404     if (!disable_skip && ref_frame != INTRA_FRAME) {
  4405       int single_rd, hybrid_rd, single_rate, hybrid_rate;
  4407       if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
  4408         single_rate = rate2 - compmode_cost;
  4409         hybrid_rate = rate2;
  4410       } else {
  4411         single_rate = rate2;
  4412         hybrid_rate = rate2 + compmode_cost;
  4415       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
  4416       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
  4418       if (second_ref_frame <= INTRA_FRAME &&
  4419           single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
  4420         best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
  4421       } else if (second_ref_frame > INTRA_FRAME &&
  4422                  single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
  4423         best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
  4425       if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
  4426         best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
  4429     /* keep record of best filter type */
  4430     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
  4431         cm->mcomp_filter_type != BILINEAR) {
  4432       int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
  4433                               SWITCHABLE_FILTERS : cm->mcomp_filter_type];
  4434       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  4435         int64_t adj_rd;
  4436         // In cases of poor prediction, filter_cache[] can contain really big
  4437         // values, which actually are bigger than this_rd itself. This can
  4438         // cause negative best_filter_rd[] values, which is obviously silly.
  4439         // Therefore, if filter_cache < ref, we do an adjusted calculation.
  4440         if (cpi->rd_filter_cache[i] >= ref)
  4441           adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
  4442         else  // FIXME(rbultje) do this for comppred also
  4443           adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
  4444         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
  4448     /* keep record of best txfm size */
  4449     if (bsize < BLOCK_32X32) {
  4450       if (bsize < BLOCK_16X16) {
  4451         tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
  4452         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
  4454       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
  4456     if (!mode_excluded && this_rd != INT64_MAX) {
  4457       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
  4458         int64_t adj_rd = INT64_MAX;
  4459         if (ref_frame > INTRA_FRAME)
  4460           adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
  4461         else
  4462           adj_rd = this_rd;
  4464         if (adj_rd < best_tx_rd[i])
  4465           best_tx_rd[i] = adj_rd;
  4469     if (early_term)
  4470       break;
  4472     if (x->skip && !comp_pred)
  4473       break;
  4476   if (best_rd >= best_rd_so_far)
  4477     return INT64_MAX;
  4479   // If we used an estimate for the uv intra rd in the loop above...
  4480   if (cpi->sf.use_uv_intra_rd_estimate) {
  4481     // Do Intra UV best rd mode selection if best mode choice above was intra.
  4482     if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
  4483       TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
  4484       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
  4485                               &rate_uv_tokenonly[uv_tx_size],
  4486                               &dist_uv[uv_tx_size],
  4487                               &skip_uv[uv_tx_size],
  4488                               BLOCK_8X8);
  4492   // If we are using reference masking and the set mask flag is set then
  4493   // create the reference frame mask.
  4494   if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
  4495     cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame);
  4497   if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
  4498     *returnrate = INT_MAX;
  4499     *returndistortion = INT_MAX;
  4500     return best_rd;
  4503   assert((cm->mcomp_filter_type == SWITCHABLE) ||
  4504          (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
  4505          (best_mbmode.ref_frame[0] == INTRA_FRAME));
  4507   // Updating rd_thresh_freq_fact[] here means that the different
  4508   // partition/block sizes are handled independently based on the best
  4509   // choice for the current partition. It may well be better to keep a scaled
  4510   // best rd so far value and update rd_thresh_freq_fact based on the mode/size
  4511   // combination that wins out.
  4512   if (cpi->sf.adaptive_rd_thresh) {
  4513     for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
  4514       if (mode_index == best_mode_index) {
  4515         cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -=
  4516           (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3);
  4517       } else {
  4518         cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC;
  4519         if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >
  4520             (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
  4521           cpi->rd_thresh_freq_sub8x8[bsize][mode_index] =
  4522             cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
  4528   // macroblock modes
  4529   *mbmi = best_mbmode;
  4530   x->skip |= best_skip2;
  4531   if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
  4532     for (i = 0; i < 4; i++)
  4533       xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
  4534   } else {
  4535     for (i = 0; i < 4; ++i)
  4536       vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
  4538     mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
  4539     mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
  4542   for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
  4543     if (best_pred_rd[i] == INT64_MAX)
  4544       best_pred_diff[i] = INT_MIN;
  4545     else
  4546       best_pred_diff[i] = best_rd - best_pred_rd[i];
  4549   if (!x->skip) {
  4550     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  4551       if (best_filter_rd[i] == INT64_MAX)
  4552         best_filter_diff[i] = 0;
  4553       else
  4554         best_filter_diff[i] = best_rd - best_filter_rd[i];
  4556     if (cm->mcomp_filter_type == SWITCHABLE)
  4557       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
  4558   } else {
  4559     vp9_zero(best_filter_diff);
  4562   if (!x->skip) {
  4563     for (i = 0; i < TX_MODES; i++) {
  4564       if (best_tx_rd[i] == INT64_MAX)
  4565         best_tx_diff[i] = 0;
  4566       else
  4567         best_tx_diff[i] = best_rd - best_tx_rd[i];
  4569   } else {
  4570     vp9_zero(best_tx_diff);
  4573   set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
  4574                     scale_factor);
  4575   store_coding_context(x, ctx, best_mode_index,
  4576                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
  4577                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
  4578                                       mbmi->ref_frame[1]][0],
  4579                        best_pred_diff, best_tx_diff, best_filter_diff);
  4581   return best_rd;

mercurial