Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include <stdio.h>
12 #include <math.h>
13 #include <limits.h>
14 #include <assert.h>
16 #include "vp9/common/vp9_pragmas.h"
17 #include "vp9/encoder/vp9_tokenize.h"
18 #include "vp9/encoder/vp9_treewriter.h"
19 #include "vp9/encoder/vp9_onyx_int.h"
20 #include "vp9/encoder/vp9_modecosts.h"
21 #include "vp9/encoder/vp9_encodeintra.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_reconinter.h"
24 #include "vp9/common/vp9_reconintra.h"
25 #include "vp9/common/vp9_findnearmv.h"
26 #include "vp9/common/vp9_quant_common.h"
27 #include "vp9/encoder/vp9_encodemb.h"
28 #include "vp9/encoder/vp9_quantize.h"
29 #include "vp9/encoder/vp9_variance.h"
30 #include "vp9/encoder/vp9_mcomp.h"
31 #include "vp9/encoder/vp9_rdopt.h"
32 #include "vp9/encoder/vp9_ratectrl.h"
33 #include "vpx_mem/vpx_mem.h"
34 #include "vp9/common/vp9_systemdependent.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/common/vp9_seg_common.h"
37 #include "vp9/common/vp9_pred_common.h"
38 #include "vp9/common/vp9_entropy.h"
39 #include "./vp9_rtcd.h"
40 #include "vp9/common/vp9_mvref_common.h"
41 #include "vp9/common/vp9_common.h"
43 #define INVALID_MV 0x80008000
45 /* Factor to weigh the rate for switchable interp filters */
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
49 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
50 #define ALT_REF_MODE_MASK 0xFFC648D0
52 #define MIN_EARLY_TERM_INDEX 3
54 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
55 {NEARESTMV, LAST_FRAME, NONE},
56 {NEARESTMV, ALTREF_FRAME, NONE},
57 {NEARESTMV, GOLDEN_FRAME, NONE},
59 {DC_PRED, INTRA_FRAME, NONE},
61 {NEWMV, LAST_FRAME, NONE},
62 {NEWMV, ALTREF_FRAME, NONE},
63 {NEWMV, GOLDEN_FRAME, NONE},
65 {NEARMV, LAST_FRAME, NONE},
66 {NEARMV, ALTREF_FRAME, NONE},
67 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
68 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
70 {TM_PRED, INTRA_FRAME, NONE},
72 {NEARMV, LAST_FRAME, ALTREF_FRAME},
73 {NEWMV, LAST_FRAME, ALTREF_FRAME},
74 {NEARMV, GOLDEN_FRAME, NONE},
75 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
76 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
78 {ZEROMV, LAST_FRAME, NONE},
79 {ZEROMV, GOLDEN_FRAME, NONE},
80 {ZEROMV, ALTREF_FRAME, NONE},
81 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
82 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
84 {H_PRED, INTRA_FRAME, NONE},
85 {V_PRED, INTRA_FRAME, NONE},
86 {D135_PRED, INTRA_FRAME, NONE},
87 {D207_PRED, INTRA_FRAME, NONE},
88 {D153_PRED, INTRA_FRAME, NONE},
89 {D63_PRED, INTRA_FRAME, NONE},
90 {D117_PRED, INTRA_FRAME, NONE},
91 {D45_PRED, INTRA_FRAME, NONE},
92 };
94 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
95 {LAST_FRAME, NONE},
96 {GOLDEN_FRAME, NONE},
97 {ALTREF_FRAME, NONE},
98 {LAST_FRAME, ALTREF_FRAME},
99 {GOLDEN_FRAME, ALTREF_FRAME},
100 {INTRA_FRAME, NONE},
101 };
103 // The baseline rd thresholds for breaking out of the rd loop for
104 // certain modes are assumed to be based on 8x8 blocks.
105 // This table is used to correct for blocks size.
106 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
107 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
108 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
110 #define RD_THRESH_MAX_FACT 64
111 #define RD_THRESH_INC 1
112 #define RD_THRESH_POW 1.25
113 #define RD_MULT_EPB_RATIO 64
115 #define MV_COST_WEIGHT 108
116 #define MV_COST_WEIGHT_SUB 120
118 static void fill_token_costs(vp9_coeff_cost *c,
119 vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
120 int i, j, k, l;
121 TX_SIZE t;
122 for (t = TX_4X4; t <= TX_32X32; t++)
123 for (i = 0; i < BLOCK_TYPES; i++)
124 for (j = 0; j < REF_TYPES; j++)
125 for (k = 0; k < COEF_BANDS; k++)
126 for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
127 vp9_prob probs[ENTROPY_NODES];
128 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
129 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
130 vp9_coef_tree);
131 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
132 vp9_coef_tree);
133 assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
134 c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
135 }
136 }
138 static const int rd_iifactor[32] = {
139 4, 4, 3, 2, 1, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0,
141 0, 0, 0, 0, 0, 0, 0, 0,
142 0, 0, 0, 0, 0, 0, 0, 0,
143 };
145 // 3* dc_qlookup[Q]*dc_qlookup[Q];
147 /* values are now correlated to quantizer */
148 static int sad_per_bit16lut[QINDEX_RANGE];
149 static int sad_per_bit4lut[QINDEX_RANGE];
151 void vp9_init_me_luts() {
152 int i;
154 // Initialize the sad lut tables using a formulaic calculation for now
155 // This is to make it easier to resolve the impact of experimental changes
156 // to the quantizer tables.
157 for (i = 0; i < QINDEX_RANGE; i++) {
158 sad_per_bit16lut[i] =
159 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
160 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
161 }
162 }
164 int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
165 const int q = vp9_dc_quant(qindex, 0);
166 // TODO(debargha): Adjust the function below
167 int rdmult = 88 * q * q / 25;
168 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
169 if (cpi->twopass.next_iiratio > 31)
170 rdmult += (rdmult * rd_iifactor[31]) >> 4;
171 else
172 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
173 }
174 return rdmult;
175 }
177 static int compute_rd_thresh_factor(int qindex) {
178 int q;
179 // TODO(debargha): Adjust the function below
180 q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
181 if (q < 8)
182 q = 8;
183 return q;
184 }
186 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
187 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
188 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
189 }
191 static void set_block_thresholds(VP9_COMP *cpi) {
192 int i, bsize, segment_id;
193 VP9_COMMON *cm = &cpi->common;
195 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
196 int q;
197 int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
198 segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
199 q = compute_rd_thresh_factor(segment_qindex);
201 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
202 // Threshold here seem unecessarily harsh but fine given actual
203 // range of values used for cpi->sf.thresh_mult[]
204 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
206 for (i = 0; i < MAX_MODES; ++i) {
207 if (cpi->sf.thresh_mult[i] < thresh_max) {
208 cpi->rd_threshes[segment_id][bsize][i] =
209 cpi->sf.thresh_mult[i] * q *
210 rd_thresh_block_size_factor[bsize] / 4;
211 } else {
212 cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
213 }
214 }
216 for (i = 0; i < MAX_REFS; ++i) {
217 if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
218 cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
219 cpi->sf.thresh_mult_sub8x8[i] * q *
220 rd_thresh_block_size_factor[bsize] / 4;
221 } else {
222 cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
223 }
224 }
225 }
226 }
227 }
229 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
230 VP9_COMMON *cm = &cpi->common;
231 int qindex, i;
233 vp9_clear_system_state(); // __asm emms;
235 // Further tests required to see if optimum is different
236 // for key frames, golden frames and arf frames.
237 // if (cpi->common.refresh_golden_frame ||
238 // cpi->common.refresh_alt_ref_frame)
239 qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
241 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
242 cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
244 cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
245 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
247 vp9_set_speed_features(cpi);
249 cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
250 cm->frame_type != KEY_FRAME) ?
251 0 : 1;
253 set_block_thresholds(cpi);
255 fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
257 for (i = 0; i < PARTITION_CONTEXTS; i++)
258 vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
259 vp9_partition_tree);
261 /*rough estimate for costing*/
262 vp9_init_mode_costs(cpi);
264 if (!frame_is_intra_only(cm)) {
265 vp9_build_nmv_cost_table(
266 cpi->mb.nmvjointcost,
267 cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
268 &cm->fc.nmvc,
269 cm->allow_high_precision_mv, 1, 1);
271 for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
272 MB_PREDICTION_MODE m;
274 for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
275 cpi->mb.inter_mode_cost[i][INTER_OFFSET(m)] =
276 cost_token(vp9_inter_mode_tree,
277 cm->fc.inter_mode_probs[i],
278 &vp9_inter_mode_encodings[INTER_OFFSET(m)]);
279 }
280 }
281 }
283 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
284 const double *tab1, const double *tab2,
285 double *v1, double *v2) {
286 double y = x * inv_step;
287 int d = (int) y;
288 if (d >= ntab - 1) {
289 *v1 = tab1[ntab - 1];
290 *v2 = tab2[ntab - 1];
291 } else {
292 double a = y - d;
293 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
294 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
295 }
296 }
298 static void model_rd_norm(double x, double *R, double *D) {
299 static const int inv_tab_step = 8;
300 static const int tab_size = 120;
301 // NOTE: The tables below must be of the same size
302 //
303 // Normalized rate
304 // This table models the rate for a Laplacian source
305 // source with given variance when quantized with a uniform quantizer
306 // with given stepsize. The closed form expression is:
307 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
308 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
309 // and H(x) is the binary entropy function.
310 static const double rate_tab[] = {
311 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
312 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
313 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
314 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
315 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
316 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
317 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
318 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
319 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
320 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
321 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
322 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
323 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
324 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
325 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
326 };
327 // Normalized distortion
328 // This table models the normalized distortion for a Laplacian source
329 // source with given variance when quantized with a uniform quantizer
330 // with given stepsize. The closed form expression is:
331 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
332 // where x = qpstep / sqrt(variance)
333 // Note the actual distortion is Dn * variance.
334 static const double dist_tab[] = {
335 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
336 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
337 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
338 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
339 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
340 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
341 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
342 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
343 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
344 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
345 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
346 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
347 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
348 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
349 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
350 };
351 /*
352 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
353 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
354 assert(sizeof(rate_tab) == sizeof(dist_tab));
355 */
356 assert(x >= 0.0);
357 linear_interpolate2(x, tab_size, inv_tab_step,
358 rate_tab, dist_tab, R, D);
359 }
361 static void model_rd_from_var_lapndz(int var, int n, int qstep,
362 int *rate, int64_t *dist) {
363 // This function models the rate and distortion for a Laplacian
364 // source with given variance when quantized with a uniform quantizer
365 // with given stepsize. The closed form expressions are in:
366 // Hang and Chen, "Source Model for transform video coder and its
367 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
368 // Sys. for Video Tech., April 1997.
369 vp9_clear_system_state();
370 if (var == 0 || n == 0) {
371 *rate = 0;
372 *dist = 0;
373 } else {
374 double D, R;
375 double s2 = (double) var / n;
376 double x = qstep / sqrt(s2);
377 model_rd_norm(x, &R, &D);
378 *rate = (int)((n << 8) * R + 0.5);
379 *dist = (int)(var * D + 0.5);
380 }
381 vp9_clear_system_state();
382 }
384 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
385 MACROBLOCK *x, MACROBLOCKD *xd,
386 int *out_rate_sum, int64_t *out_dist_sum) {
387 // Note our transform coeffs are 8 times an orthogonal transform.
388 // Hence quantizer step is also 8 times. To get effective quantizer
389 // we need to divide by 8 before sending to modeling function.
390 int i, rate_sum = 0, dist_sum = 0;
392 for (i = 0; i < MAX_MB_PLANE; ++i) {
393 struct macroblock_plane *const p = &x->plane[i];
394 struct macroblockd_plane *const pd = &xd->plane[i];
395 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
396 unsigned int sse;
397 int rate;
398 int64_t dist;
399 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
400 pd->dst.buf, pd->dst.stride, &sse);
401 // sse works better than var, since there is no dc prediction used
402 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
403 pd->dequant[1] >> 3, &rate, &dist);
405 rate_sum += rate;
406 dist_sum += (int)dist;
407 }
409 *out_rate_sum = rate_sum;
410 *out_dist_sum = dist_sum << 4;
411 }
413 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
414 TX_SIZE tx_size,
415 MACROBLOCK *x, MACROBLOCKD *xd,
416 int *out_rate_sum, int64_t *out_dist_sum,
417 int *out_skip) {
418 int j, k;
419 BLOCK_SIZE bs;
420 struct macroblock_plane *const p = &x->plane[0];
421 struct macroblockd_plane *const pd = &xd->plane[0];
422 const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
423 const int height = 4 << num_4x4_blocks_high_lookup[bsize];
424 int rate_sum = 0;
425 int64_t dist_sum = 0;
426 const int t = 4 << tx_size;
428 if (tx_size == TX_4X4) {
429 bs = BLOCK_4X4;
430 } else if (tx_size == TX_8X8) {
431 bs = BLOCK_8X8;
432 } else if (tx_size == TX_16X16) {
433 bs = BLOCK_16X16;
434 } else if (tx_size == TX_32X32) {
435 bs = BLOCK_32X32;
436 } else {
437 assert(0);
438 }
440 *out_skip = 1;
441 for (j = 0; j < height; j += t) {
442 for (k = 0; k < width; k += t) {
443 int rate;
444 int64_t dist;
445 unsigned int sse;
446 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
447 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
448 &sse);
449 // sse works better than var, since there is no dc prediction used
450 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
451 rate_sum += rate;
452 dist_sum += dist;
453 *out_skip &= (rate < 1024);
454 }
455 }
457 *out_rate_sum = rate_sum;
458 *out_dist_sum = dist_sum << 4;
459 }
461 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
462 intptr_t block_size, int64_t *ssz) {
463 int i;
464 int64_t error = 0, sqcoeff = 0;
466 for (i = 0; i < block_size; i++) {
467 int this_diff = coeff[i] - dqcoeff[i];
468 error += (unsigned)this_diff * this_diff;
469 sqcoeff += (unsigned) coeff[i] * coeff[i];
470 }
472 *ssz = sqcoeff;
473 return error;
474 }
476 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
477 * decide whether to include cost of a trailing EOB node or not (i.e. we
478 * can skip this if the last coefficient in this transform block, e.g. the
479 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
480 * were non-zero). */
481 static const int16_t band_counts[TX_SIZES][8] = {
482 { 1, 2, 3, 4, 3, 16 - 13, 0 },
483 { 1, 2, 3, 4, 11, 64 - 21, 0 },
484 { 1, 2, 3, 4, 11, 256 - 21, 0 },
485 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
486 };
488 static INLINE int cost_coeffs(MACROBLOCK *x,
489 int plane, int block,
490 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
491 TX_SIZE tx_size,
492 const int16_t *scan, const int16_t *nb) {
493 MACROBLOCKD *const xd = &x->e_mbd;
494 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
495 struct macroblockd_plane *pd = &xd->plane[plane];
496 const PLANE_TYPE type = pd->plane_type;
497 const int16_t *band_count = &band_counts[tx_size][1];
498 const int eob = pd->eobs[block];
499 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
500 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
501 unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
502 x->token_costs[tx_size][type][ref];
503 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
504 uint8_t *p_tok = x->token_cache;
505 int pt = combine_entropy_contexts(above_ec, left_ec);
506 int c, cost;
508 // Check for consistency of tx_size with mode info
509 assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
510 : get_uv_tx_size(mbmi) == tx_size);
512 if (eob == 0) {
513 // single eob token
514 cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
515 c = 0;
516 } else {
517 int band_left = *band_count++;
519 // dc token
520 int v = qcoeff_ptr[0];
521 int prev_t = vp9_dct_value_tokens_ptr[v].token;
522 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
523 p_tok[0] = vp9_pt_energy_class[prev_t];
524 ++token_costs;
526 // ac tokens
527 for (c = 1; c < eob; c++) {
528 const int rc = scan[c];
529 int t;
531 v = qcoeff_ptr[rc];
532 t = vp9_dct_value_tokens_ptr[v].token;
533 pt = get_coef_context(nb, p_tok, c);
534 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
535 p_tok[rc] = vp9_pt_energy_class[t];
536 prev_t = t;
537 if (!--band_left) {
538 band_left = *band_count++;
539 ++token_costs;
540 }
541 }
543 // eob token
544 if (band_left) {
545 pt = get_coef_context(nb, p_tok, c);
546 cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
547 }
548 }
550 // is eob first coefficient;
551 *A = *L = (c > 0);
553 return cost;
554 }
556 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
557 const int ss_txfrm_size = tx_size << 1;
558 struct rdcost_block_args* args = arg;
559 MACROBLOCK* const x = args->x;
560 MACROBLOCKD* const xd = &x->e_mbd;
561 struct macroblock_plane *const p = &x->plane[plane];
562 struct macroblockd_plane *const pd = &xd->plane[plane];
563 int64_t this_sse;
564 int shift = args->tx_size == TX_32X32 ? 0 : 2;
565 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
566 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
567 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
568 &this_sse) >> shift;
569 args->sse = this_sse >> shift;
571 if (x->skip_encode &&
572 xd->mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) {
573 // TODO(jingning): tune the model to better capture the distortion.
574 int64_t p = (pd->dequant[1] * pd->dequant[1] *
575 (1 << ss_txfrm_size)) >> (shift + 2);
576 args->dist += (p >> 4);
577 args->sse += p;
578 }
579 }
581 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
582 TX_SIZE tx_size, void *arg) {
583 struct rdcost_block_args* args = arg;
585 int x_idx, y_idx;
586 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
588 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
589 args->t_left + y_idx, args->tx_size,
590 args->scan, args->nb);
591 }
593 static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
594 TX_SIZE tx_size, void *arg) {
595 struct rdcost_block_args *args = arg;
596 MACROBLOCK *const x = args->x;
597 MACROBLOCKD *const xd = &x->e_mbd;
598 struct encode_b_args encode_args = {x, NULL};
599 int64_t rd1, rd2, rd;
601 if (args->skip)
602 return;
604 if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
605 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
606 else
607 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
609 dist_block(plane, block, tx_size, args);
610 rate_block(plane, block, plane_bsize, tx_size, args);
611 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
612 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
614 // TODO(jingning): temporarily enabled only for luma component
615 rd = MIN(rd1, rd2);
616 if (!xd->lossless && plane == 0)
617 x->zcoeff_blk[tx_size][block] = rd1 > rd2 || !xd->plane[plane].eobs[block];
619 args->this_rate += args->rate;
620 args->this_dist += args->dist;
621 args->this_sse += args->sse;
622 args->this_rd += rd;
624 if (args->this_rd > args->best_rd) {
625 args->skip = 1;
626 return;
627 }
628 }
630 void vp9_get_entropy_contexts(TX_SIZE tx_size,
631 ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
632 const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
633 int num_4x4_w, int num_4x4_h) {
634 int i;
635 switch (tx_size) {
636 case TX_4X4:
637 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
638 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
639 break;
640 case TX_8X8:
641 for (i = 0; i < num_4x4_w; i += 2)
642 t_above[i] = !!*(const uint16_t *)&above[i];
643 for (i = 0; i < num_4x4_h; i += 2)
644 t_left[i] = !!*(const uint16_t *)&left[i];
645 break;
646 case TX_16X16:
647 for (i = 0; i < num_4x4_w; i += 4)
648 t_above[i] = !!*(const uint32_t *)&above[i];
649 for (i = 0; i < num_4x4_h; i += 4)
650 t_left[i] = !!*(const uint32_t *)&left[i];
651 break;
652 case TX_32X32:
653 for (i = 0; i < num_4x4_w; i += 8)
654 t_above[i] = !!*(const uint64_t *)&above[i];
655 for (i = 0; i < num_4x4_h; i += 8)
656 t_left[i] = !!*(const uint64_t *)&left[i];
657 break;
658 default:
659 assert(!"Invalid transform size.");
660 }
661 }
663 static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
664 const int num_4x4_w, const int num_4x4_h,
665 const int64_t ref_rdcost,
666 struct rdcost_block_args *arg) {
667 vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
668 arg->x = x;
669 arg->tx_size = tx_size;
670 arg->bw = num_4x4_w;
671 arg->bh = num_4x4_h;
672 arg->best_rd = ref_rdcost;
673 }
675 static void txfm_rd_in_plane(MACROBLOCK *x,
676 struct rdcost_block_args *rd_stack,
677 int *rate, int64_t *distortion,
678 int *skippable, int64_t *sse,
679 int64_t ref_best_rd, int plane,
680 BLOCK_SIZE bsize, TX_SIZE tx_size) {
681 MACROBLOCKD *const xd = &x->e_mbd;
682 struct macroblockd_plane *const pd = &xd->plane[plane];
683 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
684 const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
685 const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
687 init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
688 ref_best_rd, rd_stack);
689 if (plane == 0)
690 xd->mi_8x8[0]->mbmi.tx_size = tx_size;
692 vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
693 pd->above_context, pd->left_context,
694 num_4x4_w, num_4x4_h);
696 get_scan(xd, tx_size, pd->plane_type, 0, &rd_stack->scan, &rd_stack->nb);
698 foreach_transformed_block_in_plane(xd, bsize, plane,
699 block_yrd_txfm, rd_stack);
700 if (rd_stack->skip) {
701 *rate = INT_MAX;
702 *distortion = INT64_MAX;
703 *sse = INT64_MAX;
704 *skippable = 0;
705 } else {
706 *distortion = rd_stack->this_dist;
707 *rate = rd_stack->this_rate;
708 *sse = rd_stack->this_sse;
709 *skippable = vp9_is_skippable_in_plane(xd, bsize, plane);
710 }
711 }
713 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
714 int *rate, int64_t *distortion,
715 int *skip, int64_t *sse,
716 int64_t ref_best_rd,
717 BLOCK_SIZE bs) {
718 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
719 VP9_COMMON *const cm = &cpi->common;
720 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
721 MACROBLOCKD *const xd = &x->e_mbd;
722 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
724 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
726 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
727 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
728 mbmi->tx_size);
729 cpi->tx_stepdown_count[0]++;
730 }
732 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
733 int (*r)[2], int *rate,
734 int64_t *d, int64_t *distortion,
735 int *s, int *skip,
736 int64_t tx_cache[TX_MODES],
737 BLOCK_SIZE bs) {
738 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
739 VP9_COMMON *const cm = &cpi->common;
740 MACROBLOCKD *const xd = &x->e_mbd;
741 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
742 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
743 int64_t rd[TX_SIZES][2];
744 int n, m;
745 int s0, s1;
747 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
749 for (n = TX_4X4; n <= max_tx_size; n++) {
750 r[n][1] = r[n][0];
751 if (r[n][0] == INT_MAX)
752 continue;
753 for (m = 0; m <= n - (n == max_tx_size); m++) {
754 if (m == n)
755 r[n][1] += vp9_cost_zero(tx_probs[m]);
756 else
757 r[n][1] += vp9_cost_one(tx_probs[m]);
758 }
759 }
761 assert(skip_prob > 0);
762 s0 = vp9_cost_bit(skip_prob, 0);
763 s1 = vp9_cost_bit(skip_prob, 1);
765 for (n = TX_4X4; n <= max_tx_size; n++) {
766 if (d[n] == INT64_MAX) {
767 rd[n][0] = rd[n][1] = INT64_MAX;
768 continue;
769 }
770 if (s[n]) {
771 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
772 } else {
773 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
774 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
775 }
776 }
778 if (max_tx_size == TX_32X32 &&
779 (cm->tx_mode == ALLOW_32X32 ||
780 (cm->tx_mode == TX_MODE_SELECT &&
781 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
782 rd[TX_32X32][1] < rd[TX_4X4][1]))) {
783 mbmi->tx_size = TX_32X32;
784 } else if (max_tx_size >= TX_16X16 &&
785 (cm->tx_mode == ALLOW_16X16 ||
786 cm->tx_mode == ALLOW_32X32 ||
787 (cm->tx_mode == TX_MODE_SELECT &&
788 rd[TX_16X16][1] < rd[TX_8X8][1] &&
789 rd[TX_16X16][1] < rd[TX_4X4][1]))) {
790 mbmi->tx_size = TX_16X16;
791 } else if (cm->tx_mode == ALLOW_8X8 ||
792 cm->tx_mode == ALLOW_16X16 ||
793 cm->tx_mode == ALLOW_32X32 ||
794 (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
795 mbmi->tx_size = TX_8X8;
796 } else {
797 mbmi->tx_size = TX_4X4;
798 }
800 *distortion = d[mbmi->tx_size];
801 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
802 *skip = s[mbmi->tx_size];
804 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
805 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
806 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
807 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
808 if (max_tx_size == TX_32X32 &&
809 rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
810 rd[TX_32X32][1] < rd[TX_4X4][1])
811 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
812 else if (max_tx_size >= TX_16X16 &&
813 rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
814 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
815 else
816 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
817 rd[TX_4X4][1] : rd[TX_8X8][1];
819 if (max_tx_size == TX_32X32 &&
820 rd[TX_32X32][1] < rd[TX_16X16][1] &&
821 rd[TX_32X32][1] < rd[TX_8X8][1] &&
822 rd[TX_32X32][1] < rd[TX_4X4][1]) {
823 cpi->tx_stepdown_count[0]++;
824 } else if (max_tx_size >= TX_16X16 &&
825 rd[TX_16X16][1] < rd[TX_8X8][1] &&
826 rd[TX_16X16][1] < rd[TX_4X4][1]) {
827 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
828 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
829 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
830 } else {
831 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
832 }
833 }
835 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
836 int (*r)[2], int *rate,
837 int64_t *d, int64_t *distortion,
838 int *s, int *skip, int64_t *sse,
839 int64_t ref_best_rd,
840 BLOCK_SIZE bs) {
841 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
842 VP9_COMMON *const cm = &cpi->common;
843 MACROBLOCKD *const xd = &x->e_mbd;
844 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
845 vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
846 int64_t rd[TX_SIZES][2];
847 int n, m;
848 int s0, s1;
849 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
850 // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
852 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
854 // for (n = TX_4X4; n <= max_txfm_size; n++)
855 // r[n][0] = (r[n][0] * scale_r[n]);
857 for (n = TX_4X4; n <= max_tx_size; n++) {
858 r[n][1] = r[n][0];
859 for (m = 0; m <= n - (n == max_tx_size); m++) {
860 if (m == n)
861 r[n][1] += vp9_cost_zero(tx_probs[m]);
862 else
863 r[n][1] += vp9_cost_one(tx_probs[m]);
864 }
865 }
867 assert(skip_prob > 0);
868 s0 = vp9_cost_bit(skip_prob, 0);
869 s1 = vp9_cost_bit(skip_prob, 1);
871 for (n = TX_4X4; n <= max_tx_size; n++) {
872 if (s[n]) {
873 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
874 } else {
875 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
876 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
877 }
878 }
879 for (n = TX_4X4; n <= max_tx_size; n++) {
880 rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
881 rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
882 }
884 if (max_tx_size == TX_32X32 &&
885 (cm->tx_mode == ALLOW_32X32 ||
886 (cm->tx_mode == TX_MODE_SELECT &&
887 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
888 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
889 rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
890 mbmi->tx_size = TX_32X32;
891 } else if (max_tx_size >= TX_16X16 &&
892 (cm->tx_mode == ALLOW_16X16 ||
893 cm->tx_mode == ALLOW_32X32 ||
894 (cm->tx_mode == TX_MODE_SELECT &&
895 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
896 rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
897 mbmi->tx_size = TX_16X16;
898 } else if (cm->tx_mode == ALLOW_8X8 ||
899 cm->tx_mode == ALLOW_16X16 ||
900 cm->tx_mode == ALLOW_32X32 ||
901 (cm->tx_mode == TX_MODE_SELECT &&
902 rd[TX_8X8][1] <= rd[TX_4X4][1])) {
903 mbmi->tx_size = TX_8X8;
904 } else {
905 mbmi->tx_size = TX_4X4;
906 }
908 // Actually encode using the chosen mode if a model was used, but do not
909 // update the r, d costs
910 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
911 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
913 if (max_tx_size == TX_32X32 &&
914 rd[TX_32X32][1] <= rd[TX_16X16][1] &&
915 rd[TX_32X32][1] <= rd[TX_8X8][1] &&
916 rd[TX_32X32][1] <= rd[TX_4X4][1]) {
917 cpi->tx_stepdown_count[0]++;
918 } else if (max_tx_size >= TX_16X16 &&
919 rd[TX_16X16][1] <= rd[TX_8X8][1] &&
920 rd[TX_16X16][1] <= rd[TX_4X4][1]) {
921 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
922 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
923 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
924 } else {
925 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
926 }
927 }
929 static void super_block_yrd(VP9_COMP *cpi,
930 MACROBLOCK *x, int *rate, int64_t *distortion,
931 int *skip, int64_t *psse, BLOCK_SIZE bs,
932 int64_t txfm_cache[TX_MODES],
933 int64_t ref_best_rd) {
934 int r[TX_SIZES][2], s[TX_SIZES];
935 int64_t d[TX_SIZES], sse[TX_SIZES];
936 MACROBLOCKD *xd = &x->e_mbd;
937 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
938 struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
939 const int b_inter_mode = is_inter_block(mbmi);
941 assert(bs == mbmi->sb_type);
942 if (b_inter_mode)
943 vp9_subtract_sby(x, bs);
945 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
946 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
947 !b_inter_mode)) {
948 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
949 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
950 ref_best_rd, bs);
951 if (psse)
952 *psse = sse[mbmi->tx_size];
953 return;
954 }
956 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
957 b_inter_mode) {
958 if (bs >= BLOCK_32X32)
959 model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
960 &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
961 if (bs >= BLOCK_16X16)
962 model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
963 &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
965 model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
966 &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
968 model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
969 &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
971 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
972 skip, sse, ref_best_rd, bs);
973 } else {
974 if (bs >= BLOCK_32X32)
975 txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32],
976 &s[TX_32X32], &sse[TX_32X32],
977 ref_best_rd, 0, bs, TX_32X32);
978 if (bs >= BLOCK_16X16)
979 txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16],
980 &s[TX_16X16], &sse[TX_16X16],
981 ref_best_rd, 0, bs, TX_16X16);
982 txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
983 &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
984 txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
985 &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
986 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
987 skip, txfm_cache, bs);
988 }
989 if (psse)
990 *psse = sse[mbmi->tx_size];
991 }
993 static int conditional_skipintra(MB_PREDICTION_MODE mode,
994 MB_PREDICTION_MODE best_intra_mode) {
995 if (mode == D117_PRED &&
996 best_intra_mode != V_PRED &&
997 best_intra_mode != D135_PRED)
998 return 1;
999 if (mode == D63_PRED &&
1000 best_intra_mode != V_PRED &&
1001 best_intra_mode != D45_PRED)
1002 return 1;
1003 if (mode == D207_PRED &&
1004 best_intra_mode != H_PRED &&
1005 best_intra_mode != D45_PRED)
1006 return 1;
1007 if (mode == D153_PRED &&
1008 best_intra_mode != H_PRED &&
1009 best_intra_mode != D135_PRED)
1010 return 1;
1011 return 0;
1012 }
1014 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1015 MB_PREDICTION_MODE *best_mode,
1016 int *bmode_costs,
1017 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1018 int *bestrate, int *bestratey,
1019 int64_t *bestdistortion,
1020 BLOCK_SIZE bsize, int64_t rd_thresh) {
1021 MB_PREDICTION_MODE mode;
1022 MACROBLOCKD *xd = &x->e_mbd;
1023 int64_t best_rd = rd_thresh;
1024 int rate = 0;
1025 int64_t distortion;
1026 struct macroblock_plane *p = &x->plane[0];
1027 struct macroblockd_plane *pd = &xd->plane[0];
1028 const int src_stride = p->src.stride;
1029 const int dst_stride = pd->dst.stride;
1030 uint8_t *src_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1031 p->src.buf, src_stride);
1032 uint8_t *dst_init = raster_block_offset_uint8(BLOCK_8X8, ib,
1033 pd->dst.buf, dst_stride);
1034 int16_t *src_diff, *coeff;
1036 ENTROPY_CONTEXT ta[2], tempa[2];
1037 ENTROPY_CONTEXT tl[2], templ[2];
1039 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1040 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1041 int idx, idy;
1042 uint8_t best_dst[8 * 8];
1044 assert(ib < 4);
1046 vpx_memcpy(ta, a, sizeof(ta));
1047 vpx_memcpy(tl, l, sizeof(tl));
1048 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
1050 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1051 int64_t this_rd;
1052 int ratey = 0;
1054 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1055 continue;
1057 // Only do the oblique modes if the best so far is
1058 // one of the neighboring directional modes
1059 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1060 if (conditional_skipintra(mode, *best_mode))
1061 continue;
1062 }
1064 rate = bmode_costs[mode];
1065 distortion = 0;
1067 vpx_memcpy(tempa, ta, sizeof(ta));
1068 vpx_memcpy(templ, tl, sizeof(tl));
1070 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1071 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1072 int64_t ssz;
1073 const int16_t *scan;
1074 const int16_t *nb;
1075 uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1076 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1077 const int block = ib + idy * 2 + idx;
1078 TX_TYPE tx_type;
1079 xd->mi_8x8[0]->bmi[block].as_mode = mode;
1080 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1081 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1082 vp9_predict_intra_block(xd, block, 1,
1083 TX_4X4, mode,
1084 x->skip_encode ? src : dst,
1085 x->skip_encode ? src_stride : dst_stride,
1086 dst, dst_stride);
1087 vp9_subtract_block(4, 4, src_diff, 8,
1088 src, src_stride,
1089 dst, dst_stride);
1091 tx_type = get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block);
1092 get_scan_nb_4x4(tx_type, &scan, &nb);
1094 if (tx_type != DCT_DCT)
1095 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1096 else
1097 x->fwd_txm4x4(src_diff, coeff, 8);
1099 vp9_regular_quantize_b_4x4(x, 4, block, scan, get_iscan_4x4(tx_type));
1101 ratey += cost_coeffs(x, 0, block,
1102 tempa + idx, templ + idy, TX_4X4, scan, nb);
1103 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1104 16, &ssz) >> 2;
1105 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1106 goto next;
1108 if (tx_type != DCT_DCT)
1109 vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
1110 dst, pd->dst.stride, tx_type);
1111 else
1112 xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
1113 16);
1114 }
1115 }
1117 rate += ratey;
1118 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1120 if (this_rd < best_rd) {
1121 *bestrate = rate;
1122 *bestratey = ratey;
1123 *bestdistortion = distortion;
1124 best_rd = this_rd;
1125 *best_mode = mode;
1126 vpx_memcpy(a, tempa, sizeof(tempa));
1127 vpx_memcpy(l, templ, sizeof(templ));
1128 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1129 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1130 num_4x4_blocks_wide * 4);
1131 }
1132 next:
1133 {}
1134 }
1136 if (best_rd >= rd_thresh || x->skip_encode)
1137 return best_rd;
1139 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1140 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1141 num_4x4_blocks_wide * 4);
1143 return best_rd;
1144 }
1146 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1147 MACROBLOCK * const mb,
1148 int * const rate,
1149 int * const rate_y,
1150 int64_t * const distortion,
1151 int64_t best_rd) {
1152 int i, j;
1153 MACROBLOCKD *const xd = &mb->e_mbd;
1154 MODE_INFO *const mic = xd->mi_8x8[0];
1155 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1156 const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1157 const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
1158 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1159 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1160 int idx, idy;
1161 int cost = 0;
1162 int64_t total_distortion = 0;
1163 int tot_rate_y = 0;
1164 int64_t total_rd = 0;
1165 ENTROPY_CONTEXT t_above[4], t_left[4];
1166 int *bmode_costs;
1168 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1169 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1171 bmode_costs = mb->mbmode_cost;
1173 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1174 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1175 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1176 MB_PREDICTION_MODE best_mode = DC_PRED;
1177 int r = INT_MAX, ry = INT_MAX;
1178 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1179 i = idy * 2 + idx;
1180 if (cpi->common.frame_type == KEY_FRAME) {
1181 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i);
1182 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, i);
1184 bmode_costs = mb->y_mode_costs[A][L];
1185 }
1187 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1188 t_above + idx, t_left + idy, &r, &ry, &d,
1189 bsize, best_rd - total_rd);
1190 if (this_rd >= best_rd - total_rd)
1191 return INT64_MAX;
1193 total_rd += this_rd;
1194 cost += r;
1195 total_distortion += d;
1196 tot_rate_y += ry;
1198 mic->bmi[i].as_mode = best_mode;
1199 for (j = 1; j < num_4x4_blocks_high; ++j)
1200 mic->bmi[i + j * 2].as_mode = best_mode;
1201 for (j = 1; j < num_4x4_blocks_wide; ++j)
1202 mic->bmi[i + j].as_mode = best_mode;
1204 if (total_rd >= best_rd)
1205 return INT64_MAX;
1206 }
1207 }
1209 *rate = cost;
1210 *rate_y = tot_rate_y;
1211 *distortion = total_distortion;
1212 mic->mbmi.mode = mic->bmi[3].as_mode;
1214 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1215 }
1217 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1218 int *rate, int *rate_tokenonly,
1219 int64_t *distortion, int *skippable,
1220 BLOCK_SIZE bsize,
1221 int64_t tx_cache[TX_MODES],
1222 int64_t best_rd) {
1223 MB_PREDICTION_MODE mode;
1224 MB_PREDICTION_MODE mode_selected = DC_PRED;
1225 MACROBLOCKD *const xd = &x->e_mbd;
1226 MODE_INFO *const mic = xd->mi_8x8[0];
1227 int this_rate, this_rate_tokenonly, s;
1228 int64_t this_distortion, this_rd;
1229 TX_SIZE best_tx = TX_4X4;
1230 int i;
1231 int *bmode_costs = x->mbmode_cost;
1233 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1234 for (i = 0; i < TX_MODES; i++)
1235 tx_cache[i] = INT64_MAX;
1237 /* Y Search for intra prediction mode */
1238 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1239 int64_t local_tx_cache[TX_MODES];
1240 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1241 MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1243 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1244 continue;
1246 if (cpi->common.frame_type == KEY_FRAME) {
1247 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0);
1248 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, 0);
1250 bmode_costs = x->y_mode_costs[A][L];
1251 }
1252 mic->mbmi.mode = mode;
1254 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1255 bsize, local_tx_cache, best_rd);
1257 if (this_rate_tokenonly == INT_MAX)
1258 continue;
1260 this_rate = this_rate_tokenonly + bmode_costs[mode];
1261 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1263 if (this_rd < best_rd) {
1264 mode_selected = mode;
1265 best_rd = this_rd;
1266 best_tx = mic->mbmi.tx_size;
1267 *rate = this_rate;
1268 *rate_tokenonly = this_rate_tokenonly;
1269 *distortion = this_distortion;
1270 *skippable = s;
1271 }
1273 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1274 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1275 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1276 local_tx_cache[cpi->common.tx_mode];
1277 if (adj_rd < tx_cache[i]) {
1278 tx_cache[i] = adj_rd;
1279 }
1280 }
1281 }
1282 }
1284 mic->mbmi.mode = mode_selected;
1285 mic->mbmi.tx_size = best_tx;
1287 return best_rd;
1288 }
1290 static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
1291 int *rate, int64_t *distortion, int *skippable,
1292 int64_t *sse, BLOCK_SIZE bsize,
1293 int64_t ref_best_rd) {
1294 MACROBLOCKD *const xd = &x->e_mbd;
1295 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
1296 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1297 int plane;
1298 int pnrate = 0, pnskip = 1;
1299 int64_t pndist = 0, pnsse = 0;
1301 if (ref_best_rd < 0)
1302 goto term;
1304 if (is_inter_block(mbmi))
1305 vp9_subtract_sbuv(x, bsize);
1307 *rate = 0;
1308 *distortion = 0;
1309 *sse = 0;
1310 *skippable = 1;
1312 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1313 txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse,
1314 ref_best_rd, plane, bsize, uv_txfm_size);
1315 if (pnrate == INT_MAX)
1316 goto term;
1317 *rate += pnrate;
1318 *distortion += pndist;
1319 *sse += pnsse;
1320 *skippable &= pnskip;
1321 }
1322 return;
1324 term:
1325 *rate = INT_MAX;
1326 *distortion = INT64_MAX;
1327 *sse = INT64_MAX;
1328 *skippable = 0;
1329 return;
1330 }
1332 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1333 PICK_MODE_CONTEXT *ctx,
1334 int *rate, int *rate_tokenonly,
1335 int64_t *distortion, int *skippable,
1336 BLOCK_SIZE bsize) {
1337 MB_PREDICTION_MODE mode;
1338 MB_PREDICTION_MODE mode_selected = DC_PRED;
1339 int64_t best_rd = INT64_MAX, this_rd;
1340 int this_rate_tokenonly, this_rate, s;
1341 int64_t this_distortion, this_sse;
1343 // int mode_mask = (bsize <= BLOCK_8X8)
1344 // ? ALL_INTRA_MODES : cpi->sf.intra_uv_mode_mask;
1346 for (mode = DC_PRED; mode <= TM_PRED; mode ++) {
1347 // if (!(mode_mask & (1 << mode)))
1348 if (!(cpi->sf.intra_uv_mode_mask[max_uv_txsize_lookup[bsize]]
1349 & (1 << mode)))
1350 continue;
1352 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
1354 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1355 &this_distortion, &s, &this_sse, bsize, best_rd);
1356 if (this_rate_tokenonly == INT_MAX)
1357 continue;
1358 this_rate = this_rate_tokenonly +
1359 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1360 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1362 if (this_rd < best_rd) {
1363 mode_selected = mode;
1364 best_rd = this_rd;
1365 *rate = this_rate;
1366 *rate_tokenonly = this_rate_tokenonly;
1367 *distortion = this_distortion;
1368 *skippable = s;
1369 if (!x->select_txfm_size) {
1370 int i;
1371 struct macroblock_plane *const p = x->plane;
1372 struct macroblockd_plane *const pd = x->e_mbd.plane;
1373 for (i = 1; i < MAX_MB_PLANE; ++i) {
1374 p[i].coeff = ctx->coeff_pbuf[i][2];
1375 pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1376 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1377 pd[i].eobs = ctx->eobs_pbuf[i][2];
1379 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1380 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1381 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1382 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1384 ctx->coeff_pbuf[i][0] = p[i].coeff;
1385 ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
1386 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1387 ctx->eobs_pbuf[i][0] = pd[i].eobs;
1388 }
1389 }
1390 }
1391 }
1393 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
1395 return best_rd;
1396 }
1398 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1399 int *rate, int *rate_tokenonly,
1400 int64_t *distortion, int *skippable,
1401 BLOCK_SIZE bsize) {
1402 int64_t this_rd;
1403 int64_t this_sse;
1405 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1406 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1407 skippable, &this_sse, bsize, INT64_MAX);
1408 *rate = *rate_tokenonly +
1409 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1410 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1412 return this_rd;
1413 }
1415 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1416 BLOCK_SIZE bsize, int *rate_uv,
1417 int *rate_uv_tokenonly,
1418 int64_t *dist_uv, int *skip_uv,
1419 MB_PREDICTION_MODE *mode_uv) {
1420 MACROBLOCK *const x = &cpi->mb;
1422 // Use an estimated rd for uv_intra based on DC_PRED if the
1423 // appropriate speed flag is set.
1424 if (cpi->sf.use_uv_intra_rd_estimate) {
1425 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1426 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1427 // Else do a proper rd search for each possible transform size that may
1428 // be considered in the main rd loop.
1429 } else {
1430 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1431 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1432 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1433 }
1434 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
1435 }
1437 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1438 int mode_context) {
1439 MACROBLOCK *const x = &cpi->mb;
1440 MACROBLOCKD *const xd = &x->e_mbd;
1441 const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
1443 // Don't account for mode here if segment skip is enabled.
1444 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1445 assert(is_inter_mode(mode));
1446 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1447 } else {
1448 return 0;
1449 }
1450 }
1452 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1453 x->e_mbd.mi_8x8[0]->mbmi.mode = mb;
1454 x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int;
1455 }
1457 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1458 BLOCK_SIZE bsize,
1459 int_mv *frame_mv,
1460 int mi_row, int mi_col,
1461 int_mv single_newmv[MAX_REF_FRAMES],
1462 int *rate_mv);
1464 static int labels2mode(MACROBLOCK *x, int i,
1465 MB_PREDICTION_MODE this_mode,
1466 int_mv *this_mv, int_mv *this_second_mv,
1467 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1468 int_mv seg_mvs[MAX_REF_FRAMES],
1469 int_mv *best_ref_mv,
1470 int_mv *second_best_ref_mv,
1471 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1472 MACROBLOCKD *const xd = &x->e_mbd;
1473 MODE_INFO *const mic = xd->mi_8x8[0];
1474 MB_MODE_INFO *mbmi = &mic->mbmi;
1475 int cost = 0, thismvcost = 0;
1476 int idx, idy;
1477 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1478 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1479 const int has_second_rf = has_second_ref(mbmi);
1481 /* We have to be careful retrieving previously-encoded motion vectors.
1482 Ones from this macroblock have to be pulled from the BLOCKD array
1483 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1484 MB_PREDICTION_MODE m;
1486 // the only time we should do costing for new motion vector or mode
1487 // is when we are on a new label (jbb May 08, 2007)
1488 switch (m = this_mode) {
1489 case NEWMV:
1490 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1491 thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
1492 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1493 if (has_second_rf) {
1494 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1495 thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
1496 &second_best_ref_mv->as_mv,
1497 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1498 }
1499 break;
1500 case NEARESTMV:
1501 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1502 if (has_second_rf)
1503 this_second_mv->as_int =
1504 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1505 break;
1506 case NEARMV:
1507 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1508 if (has_second_rf)
1509 this_second_mv->as_int =
1510 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1511 break;
1512 case ZEROMV:
1513 this_mv->as_int = 0;
1514 if (has_second_rf)
1515 this_second_mv->as_int = 0;
1516 break;
1517 default:
1518 break;
1519 }
1521 cost = cost_mv_ref(cpi, this_mode,
1522 mbmi->mode_context[mbmi->ref_frame[0]]);
1524 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1525 if (has_second_rf)
1526 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1528 mic->bmi[i].as_mode = m;
1530 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1531 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1532 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1533 &mic->bmi[i], sizeof(mic->bmi[i]));
1535 cost += thismvcost;
1536 return cost;
1537 }
1539 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1540 MACROBLOCK *x,
1541 int64_t best_yrd,
1542 int i,
1543 int *labelyrate,
1544 int64_t *distortion, int64_t *sse,
1545 ENTROPY_CONTEXT *ta,
1546 ENTROPY_CONTEXT *tl) {
1547 int k;
1548 MACROBLOCKD *xd = &x->e_mbd;
1549 struct macroblockd_plane *const pd = &xd->plane[0];
1550 struct macroblock_plane *const p = &x->plane[0];
1551 MODE_INFO *const mi = xd->mi_8x8[0];
1552 const BLOCK_SIZE bsize = mi->mbmi.sb_type;
1553 const int width = plane_block_width(bsize, pd);
1554 const int height = plane_block_height(bsize, pd);
1555 int idx, idy;
1557 uint8_t *const src = raster_block_offset_uint8(BLOCK_8X8, i,
1558 p->src.buf, p->src.stride);
1559 uint8_t *const dst = raster_block_offset_uint8(BLOCK_8X8, i,
1560 pd->dst.buf, pd->dst.stride);
1561 int64_t thisdistortion = 0, thissse = 0;
1562 int thisrate = 0, ref;
1563 const int is_compound = has_second_ref(&mi->mbmi);
1564 for (ref = 0; ref < 1 + is_compound; ++ref) {
1565 const uint8_t *pre = raster_block_offset_uint8(BLOCK_8X8, i,
1566 pd->pre[ref].buf, pd->pre[ref].stride);
1567 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1568 dst, pd->dst.stride,
1569 &mi->bmi[i].as_mv[ref].as_mv,
1570 &xd->scale_factor[ref],
1571 width, height, ref, &xd->subpix, MV_PRECISION_Q3);
1572 }
1574 vp9_subtract_block(height, width,
1575 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1576 src, p->src.stride,
1577 dst, pd->dst.stride);
1579 k = i;
1580 for (idy = 0; idy < height / 4; ++idy) {
1581 for (idx = 0; idx < width / 4; ++idx) {
1582 int64_t ssz, rd, rd1, rd2;
1583 int16_t* coeff;
1585 k += (idy * 2 + idx);
1586 coeff = BLOCK_OFFSET(p->coeff, k);
1587 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1588 coeff, 8);
1589 vp9_regular_quantize_b_4x4(x, 4, k, get_scan_4x4(DCT_DCT),
1590 get_iscan_4x4(DCT_DCT));
1591 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1592 16, &ssz);
1593 thissse += ssz;
1594 thisrate += cost_coeffs(x, 0, k,
1595 ta + (k & 1),
1596 tl + (k >> 1), TX_4X4,
1597 vp9_default_scan_4x4,
1598 vp9_default_scan_4x4_neighbors);
1599 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1600 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1601 rd = MIN(rd1, rd2);
1602 if (rd >= best_yrd)
1603 return INT64_MAX;
1604 }
1605 }
1607 *distortion = thisdistortion >> 2;
1608 *labelyrate = thisrate;
1609 *sse = thissse >> 2;
1611 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1612 }
1614 typedef struct {
1615 int eobs;
1616 int brate;
1617 int byrate;
1618 int64_t bdist;
1619 int64_t bsse;
1620 int64_t brdcost;
1621 int_mv mvs[2];
1622 ENTROPY_CONTEXT ta[2];
1623 ENTROPY_CONTEXT tl[2];
1624 } SEG_RDSTAT;
1626 typedef struct {
1627 int_mv *ref_mv, *second_ref_mv;
1628 int_mv mvp;
1630 int64_t segment_rd;
1631 int r;
1632 int64_t d;
1633 int64_t sse;
1634 int segment_yrate;
1635 MB_PREDICTION_MODE modes[4];
1636 SEG_RDSTAT rdstat[4][INTER_MODES];
1637 int mvthresh;
1638 } BEST_SEG_INFO;
1640 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1641 int r = 0;
1642 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1643 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1644 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1645 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1646 return r;
1647 }
1649 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1650 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1651 struct macroblock_plane *const p = &x->plane[0];
1652 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1654 p->src.buf = raster_block_offset_uint8(BLOCK_8X8, i, p->src.buf,
1655 p->src.stride);
1656 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1657 pd->pre[0].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[0].buf,
1658 pd->pre[0].stride);
1659 if (has_second_ref(mbmi))
1660 pd->pre[1].buf = raster_block_offset_uint8(BLOCK_8X8, i, pd->pre[1].buf,
1661 pd->pre[1].stride);
1662 }
1664 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1665 struct buf_2d orig_pre[2]) {
1666 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1667 x->plane[0].src = orig_src;
1668 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1669 if (has_second_ref(mbmi))
1670 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1671 }
1673 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1674 const TileInfo *const tile,
1675 BEST_SEG_INFO *bsi_buf, int filter_idx,
1676 int_mv seg_mvs[4][MAX_REF_FRAMES],
1677 int mi_row, int mi_col) {
1678 int i, br = 0, idx, idy;
1679 int64_t bd = 0, block_sse = 0;
1680 MB_PREDICTION_MODE this_mode;
1681 MODE_INFO *mi = x->e_mbd.mi_8x8[0];
1682 MB_MODE_INFO *const mbmi = &mi->mbmi;
1683 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1684 const int label_count = 4;
1685 int64_t this_segment_rd = 0;
1686 int label_mv_thresh;
1687 int segmentyrate = 0;
1688 const BLOCK_SIZE bsize = mbmi->sb_type;
1689 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1690 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1691 vp9_variance_fn_ptr_t *v_fn_ptr;
1692 ENTROPY_CONTEXT t_above[2], t_left[2];
1693 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1694 int mode_idx;
1695 int subpelmv = 1, have_ref = 0;
1696 const int has_second_rf = has_second_ref(mbmi);
1698 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1699 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1701 v_fn_ptr = &cpi->fn_ptr[bsize];
1703 // 64 makes this threshold really big effectively
1704 // making it so that we very rarely check mvs on
1705 // segments. setting this to 1 would make mv thresh
1706 // roughly equal to what it is for macroblocks
1707 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1709 // Segmentation method overheads
1710 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1711 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1712 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1713 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1714 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1715 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1716 MB_PREDICTION_MODE mode_selected = ZEROMV;
1717 int64_t best_rd = INT64_MAX;
1718 i = idy * 2 + idx;
1720 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1721 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
1722 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1723 &frame_mv[NEARMV][mbmi->ref_frame[0]],
1724 i, 0, mi_row, mi_col);
1725 if (has_second_rf) {
1726 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1727 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
1728 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1729 &frame_mv[NEARMV][mbmi->ref_frame[1]],
1730 i, 1, mi_row, mi_col);
1731 }
1732 // search for the best motion vector on this segment
1733 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1734 const struct buf_2d orig_src = x->plane[0].src;
1735 struct buf_2d orig_pre[2];
1737 mode_idx = INTER_OFFSET(this_mode);
1738 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1740 // if we're near/nearest and mv == 0,0, compare to zeromv
1741 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1742 this_mode == ZEROMV) &&
1743 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1744 (!has_second_rf ||
1745 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1746 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1747 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1748 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1749 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1751 if (this_mode == NEARMV) {
1752 if (c1 > c3)
1753 continue;
1754 } else if (this_mode == NEARESTMV) {
1755 if (c2 > c3)
1756 continue;
1757 } else {
1758 assert(this_mode == ZEROMV);
1759 if (!has_second_rf) {
1760 if ((c3 >= c2 &&
1761 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1762 (c3 >= c1 &&
1763 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1764 continue;
1765 } else {
1766 if ((c3 >= c2 &&
1767 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1768 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1769 (c3 >= c1 &&
1770 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1771 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1772 continue;
1773 }
1774 }
1775 }
1777 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1778 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1779 sizeof(bsi->rdstat[i][mode_idx].ta));
1780 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1781 sizeof(bsi->rdstat[i][mode_idx].tl));
1783 // motion search for newmv (single predictor case only)
1784 if (!has_second_rf && this_mode == NEWMV &&
1785 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1786 int step_param = 0;
1787 int further_steps;
1788 int thissme, bestsme = INT_MAX;
1789 int sadpb = x->sadperbit4;
1790 int_mv mvp_full;
1791 int max_mv;
1793 /* Is the best so far sufficiently good that we cant justify doing
1794 * and new motion search. */
1795 if (best_rd < label_mv_thresh)
1796 break;
1798 if (cpi->compressor_speed) {
1799 // use previous block's result as next block's MV predictor.
1800 if (i > 0) {
1801 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1802 if (i == 2)
1803 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1804 }
1805 }
1806 if (i == 0)
1807 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1808 else
1809 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1811 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1812 // Take wtd average of the step_params based on the last frame's
1813 // max mv magnitude and the best ref mvs of the current block for
1814 // the given reference.
1815 step_param = (vp9_init_search_range(cpi, max_mv) +
1816 cpi->mv_step_param) >> 1;
1817 } else {
1818 step_param = cpi->mv_step_param;
1819 }
1821 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1822 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1824 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) {
1825 mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1826 mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1827 step_param = MAX(step_param, 8);
1828 }
1830 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1831 // adjust src pointer for this block
1832 mi_buf_shift(x, i);
1833 if (cpi->sf.search_method == HEX) {
1834 bestsme = vp9_hex_search(x, &mvp_full.as_mv,
1835 step_param,
1836 sadpb, 1, v_fn_ptr, 1,
1837 &bsi->ref_mv->as_mv,
1838 &mode_mv[NEWMV].as_mv);
1839 } else if (cpi->sf.search_method == SQUARE) {
1840 bestsme = vp9_square_search(x, &mvp_full.as_mv,
1841 step_param,
1842 sadpb, 1, v_fn_ptr, 1,
1843 &bsi->ref_mv->as_mv,
1844 &mode_mv[NEWMV].as_mv);
1845 } else if (cpi->sf.search_method == BIGDIA) {
1846 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
1847 step_param,
1848 sadpb, 1, v_fn_ptr, 1,
1849 &bsi->ref_mv->as_mv,
1850 &mode_mv[NEWMV].as_mv);
1851 } else {
1852 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1853 sadpb, further_steps, 0, v_fn_ptr,
1854 bsi->ref_mv, &mode_mv[NEWMV]);
1855 }
1857 // Should we do a full search (best quality only)
1858 if (cpi->compressor_speed == 0) {
1859 /* Check if mvp_full is within the range. */
1860 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
1861 x->mv_row_min, x->mv_row_max);
1863 thissme = cpi->full_search_sad(x, &mvp_full,
1864 sadpb, 16, v_fn_ptr,
1865 x->nmvjointcost, x->mvcost,
1866 bsi->ref_mv, i);
1868 if (thissme < bestsme) {
1869 bestsme = thissme;
1870 mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int;
1871 } else {
1872 /* The full search result is actually worse so re-instate the
1873 * previous best vector */
1874 mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int;
1875 }
1876 }
1878 if (bestsme < INT_MAX) {
1879 int distortion;
1880 unsigned int sse;
1881 cpi->find_fractional_mv_step(x,
1882 &mode_mv[NEWMV].as_mv,
1883 &bsi->ref_mv->as_mv,
1884 cpi->common.allow_high_precision_mv,
1885 x->errorperbit, v_fn_ptr,
1886 0, cpi->sf.subpel_iters_per_step,
1887 x->nmvjointcost, x->mvcost,
1888 &distortion, &sse);
1890 // save motion search result for use in compound prediction
1891 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1892 }
1894 if (cpi->sf.adaptive_motion_search)
1895 x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1897 // restore src pointers
1898 mi_buf_restore(x, orig_src, orig_pre);
1899 }
1901 if (has_second_rf) {
1902 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1903 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1904 continue;
1905 }
1907 if (has_second_rf && this_mode == NEWMV &&
1908 mbmi->interp_filter == EIGHTTAP) {
1909 // adjust src pointers
1910 mi_buf_shift(x, i);
1911 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1912 int rate_mv;
1913 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1914 mi_row, mi_col, seg_mvs[i],
1915 &rate_mv);
1916 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1917 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1918 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1919 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1920 }
1921 // restore src pointers
1922 mi_buf_restore(x, orig_src, orig_pre);
1923 }
1925 bsi->rdstat[i][mode_idx].brate =
1926 labels2mode(x, i, this_mode, &mode_mv[this_mode],
1927 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1928 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1929 x->mvcost, cpi);
1932 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
1933 if (num_4x4_blocks_wide > 1)
1934 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
1935 mode_mv[this_mode].as_int;
1936 if (num_4x4_blocks_high > 1)
1937 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
1938 mode_mv[this_mode].as_int;
1939 if (has_second_rf) {
1940 bsi->rdstat[i][mode_idx].mvs[1].as_int =
1941 second_mode_mv[this_mode].as_int;
1942 if (num_4x4_blocks_wide > 1)
1943 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
1944 second_mode_mv[this_mode].as_int;
1945 if (num_4x4_blocks_high > 1)
1946 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
1947 second_mode_mv[this_mode].as_int;
1948 }
1950 // Trap vectors that reach beyond the UMV borders
1951 if (mv_check_bounds(x, &mode_mv[this_mode]))
1952 continue;
1953 if (has_second_rf &&
1954 mv_check_bounds(x, &second_mode_mv[this_mode]))
1955 continue;
1957 if (filter_idx > 0) {
1958 BEST_SEG_INFO *ref_bsi = bsi_buf;
1959 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
1960 (mode_mv[this_mode].as_mv.col & 0x0f);
1961 have_ref = mode_mv[this_mode].as_int ==
1962 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1963 if (has_second_rf) {
1964 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
1965 (second_mode_mv[this_mode].as_mv.col & 0x0f);
1966 have_ref &= second_mode_mv[this_mode].as_int ==
1967 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1968 }
1970 if (filter_idx > 1 && !subpelmv && !have_ref) {
1971 ref_bsi = bsi_buf + 1;
1972 have_ref = mode_mv[this_mode].as_int ==
1973 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1974 if (has_second_rf) {
1975 have_ref &= second_mode_mv[this_mode].as_int ==
1976 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1977 }
1978 }
1980 if (!subpelmv && have_ref &&
1981 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1982 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1983 sizeof(SEG_RDSTAT));
1984 if (num_4x4_blocks_wide > 1)
1985 bsi->rdstat[i + 1][mode_idx].eobs =
1986 ref_bsi->rdstat[i + 1][mode_idx].eobs;
1987 if (num_4x4_blocks_high > 1)
1988 bsi->rdstat[i + 2][mode_idx].eobs =
1989 ref_bsi->rdstat[i + 2][mode_idx].eobs;
1991 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1992 mode_selected = this_mode;
1993 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1994 }
1995 continue;
1996 }
1997 }
1999 bsi->rdstat[i][mode_idx].brdcost =
2000 encode_inter_mb_segment(cpi, x,
2001 bsi->segment_rd - this_segment_rd, i,
2002 &bsi->rdstat[i][mode_idx].byrate,
2003 &bsi->rdstat[i][mode_idx].bdist,
2004 &bsi->rdstat[i][mode_idx].bsse,
2005 bsi->rdstat[i][mode_idx].ta,
2006 bsi->rdstat[i][mode_idx].tl);
2007 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2008 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2009 bsi->rdstat[i][mode_idx].brate, 0);
2010 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2011 bsi->rdstat[i][mode_idx].eobs = pd->eobs[i];
2012 if (num_4x4_blocks_wide > 1)
2013 bsi->rdstat[i + 1][mode_idx].eobs = pd->eobs[i + 1];
2014 if (num_4x4_blocks_high > 1)
2015 bsi->rdstat[i + 2][mode_idx].eobs = pd->eobs[i + 2];
2016 }
2018 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2019 mode_selected = this_mode;
2020 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2021 }
2022 } /*for each 4x4 mode*/
2024 if (best_rd == INT64_MAX) {
2025 int iy, midx;
2026 for (iy = i + 1; iy < 4; ++iy)
2027 for (midx = 0; midx < INTER_MODES; ++midx)
2028 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2029 bsi->segment_rd = INT64_MAX;
2030 return;
2031 }
2033 mode_idx = INTER_OFFSET(mode_selected);
2034 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2035 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2037 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
2038 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
2039 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
2040 x->mvcost, cpi);
2042 br += bsi->rdstat[i][mode_idx].brate;
2043 bd += bsi->rdstat[i][mode_idx].bdist;
2044 block_sse += bsi->rdstat[i][mode_idx].bsse;
2045 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2046 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2048 if (this_segment_rd > bsi->segment_rd) {
2049 int iy, midx;
2050 for (iy = i + 1; iy < 4; ++iy)
2051 for (midx = 0; midx < INTER_MODES; ++midx)
2052 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2053 bsi->segment_rd = INT64_MAX;
2054 return;
2055 }
2056 }
2057 } /* for each label */
2059 bsi->r = br;
2060 bsi->d = bd;
2061 bsi->segment_yrate = segmentyrate;
2062 bsi->segment_rd = this_segment_rd;
2063 bsi->sse = block_sse;
2065 // update the coding decisions
2066 for (i = 0; i < 4; ++i)
2067 bsi->modes[i] = mi->bmi[i].as_mode;
2068 }
2070 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2071 const TileInfo *const tile,
2072 int_mv *best_ref_mv,
2073 int_mv *second_best_ref_mv,
2074 int64_t best_rd,
2075 int *returntotrate,
2076 int *returnyrate,
2077 int64_t *returndistortion,
2078 int *skippable, int64_t *psse,
2079 int mvthresh,
2080 int_mv seg_mvs[4][MAX_REF_FRAMES],
2081 BEST_SEG_INFO *bsi_buf,
2082 int filter_idx,
2083 int mi_row, int mi_col) {
2084 int i;
2085 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2086 MACROBLOCKD *xd = &x->e_mbd;
2087 MODE_INFO *mi = xd->mi_8x8[0];
2088 MB_MODE_INFO *mbmi = &mi->mbmi;
2089 int mode_idx;
2091 vp9_zero(*bsi);
2093 bsi->segment_rd = best_rd;
2094 bsi->ref_mv = best_ref_mv;
2095 bsi->second_ref_mv = second_best_ref_mv;
2096 bsi->mvp.as_int = best_ref_mv->as_int;
2097 bsi->mvthresh = mvthresh;
2099 for (i = 0; i < 4; i++)
2100 bsi->modes[i] = ZEROMV;
2102 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2103 mi_row, mi_col);
2105 if (bsi->segment_rd > best_rd)
2106 return INT64_MAX;
2107 /* set it to the best */
2108 for (i = 0; i < 4; i++) {
2109 mode_idx = INTER_OFFSET(bsi->modes[i]);
2110 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2111 if (has_second_ref(mbmi))
2112 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2113 xd->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2114 mi->bmi[i].as_mode = bsi->modes[i];
2115 }
2117 /*
2118 * used to set mbmi->mv.as_int
2119 */
2120 *returntotrate = bsi->r;
2121 *returndistortion = bsi->d;
2122 *returnyrate = bsi->segment_yrate;
2123 *skippable = vp9_is_skippable_in_plane(&x->e_mbd, BLOCK_8X8, 0);
2124 *psse = bsi->sse;
2125 mbmi->mode = bsi->modes[3];
2127 return bsi->segment_rd;
2128 }
2130 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2131 uint8_t *ref_y_buffer, int ref_y_stride,
2132 int ref_frame, BLOCK_SIZE block_size ) {
2133 MACROBLOCKD *xd = &x->e_mbd;
2134 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2135 int_mv this_mv;
2136 int i;
2137 int zero_seen = 0;
2138 int best_index = 0;
2139 int best_sad = INT_MAX;
2140 int this_sad = INT_MAX;
2141 unsigned int max_mv = 0;
2143 uint8_t *src_y_ptr = x->plane[0].src.buf;
2144 uint8_t *ref_y_ptr;
2145 int row_offset, col_offset;
2146 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2147 (cpi->sf.adaptive_motion_search &&
2148 cpi->common.show_frame &&
2149 block_size < cpi->sf.max_partition_size);
2151 // Get the sad for each candidate reference mv
2152 for (i = 0; i < num_mv_refs; i++) {
2153 this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ?
2154 mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int;
2156 max_mv = MAX(max_mv,
2157 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2158 // The list is at an end if we see 0 for a second time.
2159 if (!this_mv.as_int && zero_seen)
2160 break;
2161 zero_seen = zero_seen || !this_mv.as_int;
2163 row_offset = this_mv.as_mv.row >> 3;
2164 col_offset = this_mv.as_mv.col >> 3;
2165 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2167 // Find sad for current vector.
2168 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2169 ref_y_ptr, ref_y_stride,
2170 0x7fffffff);
2172 // Note if it is the best so far.
2173 if (this_sad < best_sad) {
2174 best_sad = this_sad;
2175 best_index = i;
2176 }
2177 }
2179 // Note the index of the mv that worked best in the reference list.
2180 x->mv_best_ref_index[ref_frame] = best_index;
2181 x->max_mv_context[ref_frame] = max_mv;
2182 }
2184 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2185 unsigned int *ref_costs_single,
2186 unsigned int *ref_costs_comp,
2187 vp9_prob *comp_mode_p) {
2188 VP9_COMMON *const cm = &cpi->common;
2189 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2190 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2191 SEG_LVL_REF_FRAME);
2192 if (seg_ref_active) {
2193 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2194 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2195 *comp_mode_p = 128;
2196 } else {
2197 vp9_prob intra_inter_p = vp9_get_pred_prob_intra_inter(cm, xd);
2198 vp9_prob comp_inter_p = 128;
2200 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
2201 comp_inter_p = vp9_get_pred_prob_comp_inter_inter(cm, xd);
2202 *comp_mode_p = comp_inter_p;
2203 } else {
2204 *comp_mode_p = 128;
2205 }
2207 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2209 if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) {
2210 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2211 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2212 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2214 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2215 base_cost += vp9_cost_bit(comp_inter_p, 0);
2217 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2218 ref_costs_single[ALTREF_FRAME] = base_cost;
2219 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2220 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2221 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2222 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2223 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2224 } else {
2225 ref_costs_single[LAST_FRAME] = 512;
2226 ref_costs_single[GOLDEN_FRAME] = 512;
2227 ref_costs_single[ALTREF_FRAME] = 512;
2228 }
2229 if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) {
2230 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2231 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2233 if (cm->comp_pred_mode == HYBRID_PREDICTION)
2234 base_cost += vp9_cost_bit(comp_inter_p, 1);
2236 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2237 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2238 } else {
2239 ref_costs_comp[LAST_FRAME] = 512;
2240 ref_costs_comp[GOLDEN_FRAME] = 512;
2241 }
2242 }
2243 }
2245 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2246 int mode_index,
2247 int_mv *ref_mv,
2248 int_mv *second_ref_mv,
2249 int64_t comp_pred_diff[NB_PREDICTION_TYPES],
2250 int64_t tx_size_diff[TX_MODES],
2251 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2252 MACROBLOCKD *const xd = &x->e_mbd;
2254 // Take a snapshot of the coding context so it can be
2255 // restored if we decide to encode this way
2256 ctx->skip = x->skip;
2257 ctx->best_mode_index = mode_index;
2258 ctx->mic = *xd->mi_8x8[0];
2260 ctx->best_ref_mv.as_int = ref_mv->as_int;
2261 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2263 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_PREDICTION_ONLY];
2264 ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
2265 ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
2267 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2268 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2269 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2270 }
2272 static void setup_pred_block(const MACROBLOCKD *xd,
2273 struct buf_2d dst[MAX_MB_PLANE],
2274 const YV12_BUFFER_CONFIG *src,
2275 int mi_row, int mi_col,
2276 const struct scale_factors *scale,
2277 const struct scale_factors *scale_uv) {
2278 int i;
2280 dst[0].buf = src->y_buffer;
2281 dst[0].stride = src->y_stride;
2282 dst[1].buf = src->u_buffer;
2283 dst[2].buf = src->v_buffer;
2284 dst[1].stride = dst[2].stride = src->uv_stride;
2285 #if CONFIG_ALPHA
2286 dst[3].buf = src->alpha_buffer;
2287 dst[3].stride = src->alpha_stride;
2288 #endif
2290 // TODO(jkoleszar): Make scale factors per-plane data
2291 for (i = 0; i < MAX_MB_PLANE; i++) {
2292 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2293 i ? scale_uv : scale,
2294 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2295 }
2296 }
2298 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2299 const TileInfo *const tile,
2300 int idx, MV_REFERENCE_FRAME frame_type,
2301 BLOCK_SIZE block_size,
2302 int mi_row, int mi_col,
2303 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2304 int_mv frame_near_mv[MAX_REF_FRAMES],
2305 struct buf_2d yv12_mb[4][MAX_MB_PLANE],
2306 struct scale_factors scale[MAX_REF_FRAMES]) {
2307 VP9_COMMON *cm = &cpi->common;
2308 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2309 MACROBLOCKD *const xd = &x->e_mbd;
2310 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2312 // set up scaling factors
2313 scale[frame_type] = cpi->common.active_ref_scale[frame_type - 1];
2315 scale[frame_type].sfc->set_scaled_offsets(&scale[frame_type],
2316 mi_row * MI_SIZE, mi_col * MI_SIZE);
2318 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2319 // use the UV scaling factors.
2320 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col,
2321 &scale[frame_type], &scale[frame_type]);
2323 // Gets an initial list of candidate vectors from neighbours and orders them
2324 vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0],
2325 xd->last_mi,
2326 frame_type,
2327 mbmi->ref_mvs[frame_type], mi_row, mi_col);
2329 // Candidate refinement carried out at encoder and decoder
2330 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
2331 mbmi->ref_mvs[frame_type],
2332 &frame_nearest_mv[frame_type],
2333 &frame_near_mv[frame_type]);
2335 // Further refinement that is encode side only to test the top few candidates
2336 // in full and choose the best as the centre point for subsequent searches.
2337 // The current implementation doesn't support scaling.
2338 if (!vp9_is_scaled(scale[frame_type].sfc) && block_size >= BLOCK_8X8)
2339 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2340 frame_type, block_size);
2341 }
2343 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2344 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2345 int fb = get_ref_frame_idx(cpi, ref_frame);
2346 int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame);
2347 if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb])
2348 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]];
2349 return scaled_ref_frame;
2350 }
2352 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2353 const MACROBLOCKD *const xd = &x->e_mbd;
2354 const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2355 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2356 return SWITCHABLE_INTERP_RATE_FACTOR *
2357 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2358 }
2360 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2361 const TileInfo *const tile,
2362 BLOCK_SIZE bsize,
2363 int mi_row, int mi_col,
2364 int_mv *tmp_mv, int *rate_mv) {
2365 MACROBLOCKD *xd = &x->e_mbd;
2366 VP9_COMMON *cm = &cpi->common;
2367 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2368 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2369 int bestsme = INT_MAX;
2370 int further_steps, step_param;
2371 int sadpb = x->sadperbit16;
2372 int_mv mvp_full;
2373 int ref = mbmi->ref_frame[0];
2374 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2375 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2377 int tmp_col_min = x->mv_col_min;
2378 int tmp_col_max = x->mv_col_max;
2379 int tmp_row_min = x->mv_row_min;
2380 int tmp_row_max = x->mv_row_max;
2382 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2384 if (scaled_ref_frame) {
2385 int i;
2386 // Swap out the reference frame for a version that's been scaled to
2387 // match the resolution of the current frame, allowing the existing
2388 // motion search code to be used without additional modifications.
2389 for (i = 0; i < MAX_MB_PLANE; i++)
2390 backup_yv12[i] = xd->plane[i].pre[0];
2392 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2393 }
2395 vp9_clamp_mv_min_max(x, &ref_mv.as_mv);
2397 // Adjust search parameters based on small partitions' result.
2398 if (x->fast_ms) {
2399 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2400 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2401 // adjust search range
2402 step_param = 6;
2403 if (x->fast_ms > 1)
2404 step_param = 8;
2406 // Get prediction MV.
2407 mvp_full.as_int = x->pred_mv[ref].as_int;
2409 // Adjust MV sign if needed.
2410 if (cm->ref_frame_sign_bias[ref]) {
2411 mvp_full.as_mv.col *= -1;
2412 mvp_full.as_mv.row *= -1;
2413 }
2414 } else {
2415 // Work out the size of the first step in the mv step search.
2416 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2417 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2418 // Take wtd average of the step_params based on the last frame's
2419 // max mv magnitude and that based on the best ref mvs of the current
2420 // block for the given reference.
2421 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2422 cpi->mv_step_param) >> 1;
2423 } else {
2424 step_param = cpi->mv_step_param;
2425 }
2426 }
2428 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2429 cpi->common.show_frame) {
2430 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2431 b_width_log2(bsize)));
2432 step_param = MAX(step_param, boffset);
2433 }
2435 mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ?
2436 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int :
2437 x->pred_mv[ref].as_int;
2439 mvp_full.as_mv.col >>= 3;
2440 mvp_full.as_mv.row >>= 3;
2442 // Further step/diamond searches as necessary
2443 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2445 if (cpi->sf.search_method == HEX) {
2446 bestsme = vp9_hex_search(x, &mvp_full.as_mv,
2447 step_param,
2448 sadpb, 1,
2449 &cpi->fn_ptr[block_size], 1,
2450 &ref_mv.as_mv, &tmp_mv->as_mv);
2451 } else if (cpi->sf.search_method == SQUARE) {
2452 bestsme = vp9_square_search(x, &mvp_full.as_mv,
2453 step_param,
2454 sadpb, 1,
2455 &cpi->fn_ptr[block_size], 1,
2456 &ref_mv.as_mv, &tmp_mv->as_mv);
2457 } else if (cpi->sf.search_method == BIGDIA) {
2458 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
2459 step_param,
2460 sadpb, 1,
2461 &cpi->fn_ptr[block_size], 1,
2462 &ref_mv.as_mv, &tmp_mv->as_mv);
2463 } else {
2464 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2465 sadpb, further_steps, 1,
2466 &cpi->fn_ptr[block_size],
2467 &ref_mv, tmp_mv);
2468 }
2470 x->mv_col_min = tmp_col_min;
2471 x->mv_col_max = tmp_col_max;
2472 x->mv_row_min = tmp_row_min;
2473 x->mv_row_max = tmp_row_max;
2475 if (bestsme < INT_MAX) {
2476 int dis; /* TODO: use dis in distortion calculation later. */
2477 unsigned int sse;
2478 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
2479 cm->allow_high_precision_mv,
2480 x->errorperbit,
2481 &cpi->fn_ptr[block_size],
2482 0, cpi->sf.subpel_iters_per_step,
2483 x->nmvjointcost, x->mvcost,
2484 &dis, &sse);
2485 }
2486 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
2487 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2489 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2490 x->pred_mv[ref].as_int = tmp_mv->as_int;
2492 if (scaled_ref_frame) {
2493 int i;
2494 for (i = 0; i < MAX_MB_PLANE; i++)
2495 xd->plane[i].pre[0] = backup_yv12[i];
2496 }
2497 }
2499 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2500 BLOCK_SIZE bsize,
2501 int_mv *frame_mv,
2502 int mi_row, int mi_col,
2503 int_mv single_newmv[MAX_REF_FRAMES],
2504 int *rate_mv) {
2505 int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
2506 MACROBLOCKD *xd = &x->e_mbd;
2507 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2508 const int refs[2] = { mbmi->ref_frame[0],
2509 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2510 int_mv ref_mv[2];
2511 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
2512 int ite, ref;
2513 // Prediction buffer from second frame.
2514 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2516 // Do joint motion search in compound mode to get more accurate mv.
2517 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2518 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2519 int last_besterr[2] = {INT_MAX, INT_MAX};
2520 YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2521 get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2522 get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2523 };
2525 for (ref = 0; ref < 2; ++ref) {
2526 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2528 if (scaled_ref_frame[ref]) {
2529 int i;
2530 // Swap out the reference frame for a version that's been scaled to
2531 // match the resolution of the current frame, allowing the existing
2532 // motion search code to be used without additional modifications.
2533 for (i = 0; i < MAX_MB_PLANE; i++)
2534 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2535 setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
2536 }
2538 xd->scale_factor[ref].sfc->set_scaled_offsets(&xd->scale_factor[ref],
2539 mi_row, mi_col);
2540 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2541 }
2543 // Allow joint search multiple times iteratively for each ref frame
2544 // and break out the search loop if it couldn't find better mv.
2545 for (ite = 0; ite < 4; ite++) {
2546 struct buf_2d ref_yv12[2];
2547 int bestsme = INT_MAX;
2548 int sadpb = x->sadperbit16;
2549 int_mv tmp_mv;
2550 int search_range = 3;
2552 int tmp_col_min = x->mv_col_min;
2553 int tmp_col_max = x->mv_col_max;
2554 int tmp_row_min = x->mv_row_min;
2555 int tmp_row_max = x->mv_row_max;
2556 int id = ite % 2;
2558 // Initialized here because of compiler problem in Visual Studio.
2559 ref_yv12[0] = xd->plane[0].pre[0];
2560 ref_yv12[1] = xd->plane[0].pre[1];
2562 // Get pred block from second frame.
2563 vp9_build_inter_predictor(ref_yv12[!id].buf,
2564 ref_yv12[!id].stride,
2565 second_pred, pw,
2566 &frame_mv[refs[!id]].as_mv,
2567 &xd->scale_factor[!id],
2568 pw, ph, 0,
2569 &xd->subpix, MV_PRECISION_Q3);
2571 // Compound motion search on first ref frame.
2572 if (id)
2573 xd->plane[0].pre[0] = ref_yv12[id];
2574 vp9_clamp_mv_min_max(x, &ref_mv[id].as_mv);
2576 // Use mv result from single mode as mvp.
2577 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2579 tmp_mv.as_mv.col >>= 3;
2580 tmp_mv.as_mv.row >>= 3;
2582 // Small-range full-pixel motion search
2583 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2584 search_range,
2585 &cpi->fn_ptr[block_size],
2586 x->nmvjointcost, x->mvcost,
2587 &ref_mv[id], second_pred,
2588 pw, ph);
2590 x->mv_col_min = tmp_col_min;
2591 x->mv_col_max = tmp_col_max;
2592 x->mv_row_min = tmp_row_min;
2593 x->mv_row_max = tmp_row_max;
2595 if (bestsme < INT_MAX) {
2596 int dis; /* TODO: use dis in distortion calculation later. */
2597 unsigned int sse;
2599 bestsme = cpi->find_fractional_mv_step_comp(
2600 x, &tmp_mv.as_mv,
2601 &ref_mv[id].as_mv,
2602 cpi->common.allow_high_precision_mv,
2603 x->errorperbit,
2604 &cpi->fn_ptr[block_size],
2605 0, cpi->sf.subpel_iters_per_step,
2606 x->nmvjointcost, x->mvcost,
2607 &dis, &sse, second_pred,
2608 pw, ph);
2609 }
2611 if (id)
2612 xd->plane[0].pre[0] = scaled_first_yv12;
2614 if (bestsme < last_besterr[id]) {
2615 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2616 last_besterr[id] = bestsme;
2617 } else {
2618 break;
2619 }
2620 }
2622 *rate_mv = 0;
2624 for (ref = 0; ref < 2; ++ref) {
2625 if (scaled_ref_frame[ref]) {
2626 // restore the predictor
2627 int i;
2628 for (i = 0; i < MAX_MB_PLANE; i++)
2629 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2630 }
2632 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2633 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2634 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2635 }
2637 vpx_free(second_pred);
2638 }
2640 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2641 const TileInfo *const tile,
2642 BLOCK_SIZE bsize,
2643 int64_t txfm_cache[],
2644 int *rate2, int64_t *distortion,
2645 int *skippable,
2646 int *rate_y, int64_t *distortion_y,
2647 int *rate_uv, int64_t *distortion_uv,
2648 int *mode_excluded, int *disable_skip,
2649 INTERPOLATION_TYPE *best_filter,
2650 int_mv (*mode_mv)[MAX_REF_FRAMES],
2651 int mi_row, int mi_col,
2652 int_mv single_newmv[MAX_REF_FRAMES],
2653 int64_t *psse,
2654 const int64_t ref_best_rd) {
2655 VP9_COMMON *cm = &cpi->common;
2656 MACROBLOCKD *xd = &x->e_mbd;
2657 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2658 const int is_comp_pred = has_second_ref(mbmi);
2659 const int num_refs = is_comp_pred ? 2 : 1;
2660 const int this_mode = mbmi->mode;
2661 int_mv *frame_mv = mode_mv[this_mode];
2662 int i;
2663 int refs[2] = { mbmi->ref_frame[0],
2664 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2665 int_mv cur_mv[2];
2666 int64_t this_rd = 0;
2667 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2668 int pred_exists = 0;
2669 int intpel_mv;
2670 int64_t rd, best_rd = INT64_MAX;
2671 int best_needs_copy = 0;
2672 uint8_t *orig_dst[MAX_MB_PLANE];
2673 int orig_dst_stride[MAX_MB_PLANE];
2674 int rs = 0;
2676 if (is_comp_pred) {
2677 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2678 frame_mv[refs[1]].as_int == INVALID_MV)
2679 return INT64_MAX;
2680 }
2682 if (this_mode == NEWMV) {
2683 int rate_mv;
2684 if (is_comp_pred) {
2685 // Initialize mv using single prediction mode result.
2686 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2687 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2689 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2690 joint_motion_search(cpi, x, bsize, frame_mv,
2691 mi_row, mi_col, single_newmv, &rate_mv);
2692 } else {
2693 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2694 &mbmi->ref_mvs[refs[0]][0].as_mv,
2695 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2696 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2697 &mbmi->ref_mvs[refs[1]][0].as_mv,
2698 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2699 }
2700 *rate2 += rate_mv;
2701 } else {
2702 int_mv tmp_mv;
2703 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2704 &tmp_mv, &rate_mv);
2705 *rate2 += rate_mv;
2706 frame_mv[refs[0]].as_int =
2707 xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2708 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2709 }
2710 }
2712 // if we're near/nearest and mv == 0,0, compare to zeromv
2713 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2714 frame_mv[refs[0]].as_int == 0 &&
2715 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2716 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2717 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2718 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2719 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2720 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2722 if (this_mode == NEARMV) {
2723 if (c1 > c3)
2724 return INT64_MAX;
2725 } else if (this_mode == NEARESTMV) {
2726 if (c2 > c3)
2727 return INT64_MAX;
2728 } else {
2729 assert(this_mode == ZEROMV);
2730 if (num_refs == 1) {
2731 if ((c3 >= c2 &&
2732 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2733 (c3 >= c1 &&
2734 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2735 return INT64_MAX;
2736 } else {
2737 if ((c3 >= c2 &&
2738 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2739 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2740 (c3 >= c1 &&
2741 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2742 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2743 return INT64_MAX;
2744 }
2745 }
2746 }
2748 for (i = 0; i < num_refs; ++i) {
2749 cur_mv[i] = frame_mv[refs[i]];
2750 // Clip "next_nearest" so that it does not extend to far out of image
2751 if (this_mode != NEWMV)
2752 clamp_mv2(&cur_mv[i].as_mv, xd);
2754 if (mv_check_bounds(x, &cur_mv[i]))
2755 return INT64_MAX;
2756 mbmi->mv[i].as_int = cur_mv[i].as_int;
2757 }
2759 // do first prediction into the destination buffer. Do the next
2760 // prediction into a temporary buffer. Then keep track of which one
2761 // of these currently holds the best predictor, and use the other
2762 // one for future predictions. In the end, copy from tmp_buf to
2763 // dst if necessary.
2764 for (i = 0; i < MAX_MB_PLANE; i++) {
2765 orig_dst[i] = xd->plane[i].dst.buf;
2766 orig_dst_stride[i] = xd->plane[i].dst.stride;
2767 }
2769 /* We don't include the cost of the second reference here, because there
2770 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2771 * words if you present them in that order, the second one is always known
2772 * if the first is known */
2773 *rate2 += cost_mv_ref(cpi, this_mode,
2774 mbmi->mode_context[mbmi->ref_frame[0]]);
2776 if (!(*mode_excluded)) {
2777 if (is_comp_pred) {
2778 *mode_excluded = (cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY);
2779 } else {
2780 *mode_excluded = (cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY);
2781 }
2782 }
2784 pred_exists = 0;
2785 // Are all MVs integer pel for Y and UV
2786 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2787 (mbmi->mv[0].as_mv.col & 15) == 0;
2788 if (is_comp_pred)
2789 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2790 (mbmi->mv[1].as_mv.col & 15) == 0;
2791 // Search for best switchable filter by checking the variance of
2792 // pred error irrespective of whether the filter will be used
2793 if (cm->mcomp_filter_type != BILINEAR) {
2794 *best_filter = EIGHTTAP;
2795 if (x->source_variance <
2796 cpi->sf.disable_filter_search_var_thresh) {
2797 *best_filter = EIGHTTAP;
2798 vp9_zero(cpi->rd_filter_cache);
2799 } else {
2800 int i, newbest;
2801 int tmp_rate_sum = 0;
2802 int64_t tmp_dist_sum = 0;
2804 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
2805 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2806 int j;
2807 int64_t rs_rd;
2808 mbmi->interp_filter = i;
2809 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2810 rs = get_switchable_rate(x);
2811 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2813 if (i > 0 && intpel_mv) {
2814 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2815 tmp_rate_sum, tmp_dist_sum);
2816 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2817 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2818 cpi->rd_filter_cache[i] + rs_rd);
2819 rd = cpi->rd_filter_cache[i];
2820 if (cm->mcomp_filter_type == SWITCHABLE)
2821 rd += rs_rd;
2822 } else {
2823 int rate_sum = 0;
2824 int64_t dist_sum = 0;
2825 if ((cm->mcomp_filter_type == SWITCHABLE &&
2826 (!i || best_needs_copy)) ||
2827 (cm->mcomp_filter_type != SWITCHABLE &&
2828 (cm->mcomp_filter_type == mbmi->interp_filter ||
2829 (i == 0 && intpel_mv)))) {
2830 for (j = 0; j < MAX_MB_PLANE; j++) {
2831 xd->plane[j].dst.buf = orig_dst[j];
2832 xd->plane[j].dst.stride = orig_dst_stride[j];
2833 }
2834 } else {
2835 for (j = 0; j < MAX_MB_PLANE; j++) {
2836 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2837 xd->plane[j].dst.stride = 64;
2838 }
2839 }
2840 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2841 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2842 cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
2843 rate_sum, dist_sum);
2844 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2845 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
2846 cpi->rd_filter_cache[i] + rs_rd);
2847 rd = cpi->rd_filter_cache[i];
2848 if (cm->mcomp_filter_type == SWITCHABLE)
2849 rd += rs_rd;
2850 if (i == 0 && intpel_mv) {
2851 tmp_rate_sum = rate_sum;
2852 tmp_dist_sum = dist_sum;
2853 }
2854 }
2855 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2856 if (rd / 2 > ref_best_rd) {
2857 for (i = 0; i < MAX_MB_PLANE; i++) {
2858 xd->plane[i].dst.buf = orig_dst[i];
2859 xd->plane[i].dst.stride = orig_dst_stride[i];
2860 }
2861 return INT64_MAX;
2862 }
2863 }
2864 newbest = i == 0 || rd < best_rd;
2866 if (newbest) {
2867 best_rd = rd;
2868 *best_filter = mbmi->interp_filter;
2869 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
2870 best_needs_copy = !best_needs_copy;
2871 }
2873 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2874 (cm->mcomp_filter_type != SWITCHABLE &&
2875 cm->mcomp_filter_type == mbmi->interp_filter)) {
2876 pred_exists = 1;
2877 }
2878 }
2880 for (i = 0; i < MAX_MB_PLANE; i++) {
2881 xd->plane[i].dst.buf = orig_dst[i];
2882 xd->plane[i].dst.stride = orig_dst_stride[i];
2883 }
2884 }
2885 }
2886 // Set the appropriate filter
2887 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2888 cm->mcomp_filter_type : *best_filter;
2889 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2890 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
2892 if (pred_exists) {
2893 if (best_needs_copy) {
2894 // again temporarily set the buffers to local memory to prevent a memcpy
2895 for (i = 0; i < MAX_MB_PLANE; i++) {
2896 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2897 xd->plane[i].dst.stride = 64;
2898 }
2899 }
2900 } else {
2901 // Handles the special case when a filter that is not in the
2902 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2903 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2904 }
2907 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2908 int tmp_rate;
2909 int64_t tmp_dist;
2910 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2911 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2912 // if current pred_error modeled rd is substantially more than the best
2913 // so far, do not bother doing full rd
2914 if (rd / 2 > ref_best_rd) {
2915 for (i = 0; i < MAX_MB_PLANE; i++) {
2916 xd->plane[i].dst.buf = orig_dst[i];
2917 xd->plane[i].dst.stride = orig_dst_stride[i];
2918 }
2919 return INT64_MAX;
2920 }
2921 }
2923 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2924 *rate2 += get_switchable_rate(x);
2926 if (!is_comp_pred && cpi->enable_encode_breakout) {
2927 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2928 x->skip = 1;
2929 else if (x->encode_breakout) {
2930 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2931 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2932 unsigned int var, sse;
2933 // Skipping threshold for ac.
2934 unsigned int thresh_ac;
2935 // The encode_breakout input
2936 unsigned int encode_breakout = x->encode_breakout << 4;
2937 unsigned int max_thresh = 36000;
2939 // Use extreme low threshold for static frames to limit skipping.
2940 if (cpi->enable_encode_breakout == 2)
2941 max_thresh = 128;
2943 // Calculate threshold according to dequant value.
2944 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2946 // Use encode_breakout input if it is bigger than internal threshold.
2947 if (thresh_ac < encode_breakout)
2948 thresh_ac = encode_breakout;
2950 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2951 if (thresh_ac > max_thresh)
2952 thresh_ac = max_thresh;
2954 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2955 xd->plane[0].dst.buf,
2956 xd->plane[0].dst.stride, &sse);
2958 // Adjust threshold according to partition size.
2959 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2960 b_height_log2_lookup[bsize]);
2962 // Y skipping condition checking
2963 if (sse < thresh_ac || sse == 0) {
2964 // Skipping threshold for dc
2965 unsigned int thresh_dc;
2967 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2969 // dc skipping checking
2970 if ((sse - var) < thresh_dc || sse == var) {
2971 unsigned int sse_u, sse_v;
2972 unsigned int var_u, var_v;
2974 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2975 x->plane[1].src.stride,
2976 xd->plane[1].dst.buf,
2977 xd->plane[1].dst.stride, &sse_u);
2979 // U skipping condition checking
2980 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2981 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2982 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2983 x->plane[2].src.stride,
2984 xd->plane[2].dst.buf,
2985 xd->plane[2].dst.stride, &sse_v);
2987 // V skipping condition checking
2988 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2989 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2990 x->skip = 1;
2992 // The cost of skip bit needs to be added.
2993 *rate2 += vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
2995 // Scaling factor for SSE from spatial domain to frequency domain
2996 // is 16. Adjust distortion accordingly.
2997 *distortion_uv = (sse_u + sse_v) << 4;
2998 *distortion = (sse << 4) + *distortion_uv;
3000 *disable_skip = 1;
3001 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3002 }
3003 }
3004 }
3005 }
3006 }
3007 }
3009 if (!x->skip) {
3010 int skippable_y, skippable_uv;
3011 int64_t sseuv = INT64_MAX;
3012 int64_t rdcosty = INT64_MAX;
3014 // Y cost and distortion
3015 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
3016 bsize, txfm_cache, ref_best_rd);
3018 if (*rate_y == INT_MAX) {
3019 *rate2 = INT_MAX;
3020 *distortion = INT64_MAX;
3021 for (i = 0; i < MAX_MB_PLANE; i++) {
3022 xd->plane[i].dst.buf = orig_dst[i];
3023 xd->plane[i].dst.stride = orig_dst_stride[i];
3024 }
3025 return INT64_MAX;
3026 }
3028 *rate2 += *rate_y;
3029 *distortion += *distortion_y;
3031 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3032 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
3034 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
3035 bsize, ref_best_rd - rdcosty);
3036 if (*rate_uv == INT_MAX) {
3037 *rate2 = INT_MAX;
3038 *distortion = INT64_MAX;
3039 for (i = 0; i < MAX_MB_PLANE; i++) {
3040 xd->plane[i].dst.buf = orig_dst[i];
3041 xd->plane[i].dst.stride = orig_dst_stride[i];
3042 }
3043 return INT64_MAX;
3044 }
3046 *psse += sseuv;
3047 *rate2 += *rate_uv;
3048 *distortion += *distortion_uv;
3049 *skippable = skippable_y && skippable_uv;
3050 }
3052 for (i = 0; i < MAX_MB_PLANE; i++) {
3053 xd->plane[i].dst.buf = orig_dst[i];
3054 xd->plane[i].dst.stride = orig_dst_stride[i];
3055 }
3057 return this_rd; // if 0, this will be re-calculated by caller
3058 }
3060 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3061 int max_plane) {
3062 struct macroblock_plane *const p = x->plane;
3063 struct macroblockd_plane *const pd = x->e_mbd.plane;
3064 int i;
3066 for (i = 0; i < max_plane; ++i) {
3067 p[i].coeff = ctx->coeff_pbuf[i][1];
3068 pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3069 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3070 pd[i].eobs = ctx->eobs_pbuf[i][1];
3072 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3073 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3074 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3075 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3077 ctx->coeff_pbuf[i][0] = p[i].coeff;
3078 ctx->qcoeff_pbuf[i][0] = pd[i].qcoeff;
3079 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3080 ctx->eobs_pbuf[i][0] = pd[i].eobs;
3081 }
3082 }
3084 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3085 int *returnrate, int64_t *returndist,
3086 BLOCK_SIZE bsize,
3087 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3088 VP9_COMMON *const cm = &cpi->common;
3089 MACROBLOCKD *const xd = &x->e_mbd;
3090 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3091 int y_skip = 0, uv_skip = 0;
3092 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3093 x->skip_encode = 0;
3094 ctx->skip = 0;
3095 xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3096 if (bsize >= BLOCK_8X8) {
3097 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3098 &dist_y, &y_skip, bsize, tx_cache,
3099 best_rd) >= best_rd) {
3100 *returnrate = INT_MAX;
3101 return;
3102 }
3103 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3104 &dist_uv, &uv_skip, bsize);
3105 } else {
3106 y_skip = 0;
3107 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3108 &dist_y, best_rd) >= best_rd) {
3109 *returnrate = INT_MAX;
3110 return;
3111 }
3112 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3113 &dist_uv, &uv_skip, BLOCK_8X8);
3114 }
3116 if (y_skip && uv_skip) {
3117 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3118 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 1);
3119 *returndist = dist_y + dist_uv;
3120 vp9_zero(ctx->tx_rd_diff);
3121 } else {
3122 int i;
3123 *returnrate = rate_y + rate_uv +
3124 vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0);
3125 *returndist = dist_y + dist_uv;
3126 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3127 for (i = 0; i < TX_MODES; i++) {
3128 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3129 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3130 else
3131 ctx->tx_rd_diff[i] = 0;
3132 }
3133 }
3135 ctx->mic = *xd->mi_8x8[0];
3136 }
3138 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3139 const TileInfo *const tile,
3140 int mi_row, int mi_col,
3141 int *returnrate,
3142 int64_t *returndistortion,
3143 BLOCK_SIZE bsize,
3144 PICK_MODE_CONTEXT *ctx,
3145 int64_t best_rd_so_far) {
3146 VP9_COMMON *cm = &cpi->common;
3147 MACROBLOCKD *xd = &x->e_mbd;
3148 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3149 const struct segmentation *seg = &cm->seg;
3150 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3151 MB_PREDICTION_MODE this_mode;
3152 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3153 unsigned char segment_id = mbmi->segment_id;
3154 int comp_pred, i;
3155 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3156 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3157 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3158 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3159 VP9_ALT_FLAG };
3160 int idx_list[4] = {0,
3161 cpi->lst_fb_idx,
3162 cpi->gld_fb_idx,
3163 cpi->alt_fb_idx};
3164 int64_t best_rd = best_rd_so_far;
3165 int64_t best_tx_rd[TX_MODES];
3166 int64_t best_tx_diff[TX_MODES];
3167 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3168 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3169 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3170 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3171 MB_MODE_INFO best_mbmode = { 0 };
3172 int j;
3173 int mode_index, best_mode_index = 0;
3174 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3175 vp9_prob comp_mode_p;
3176 int64_t best_intra_rd = INT64_MAX;
3177 int64_t best_inter_rd = INT64_MAX;
3178 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3179 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3180 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
3181 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3182 int64_t dist_uv[TX_SIZES];
3183 int skip_uv[TX_SIZES];
3184 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3185 struct scale_factors scale_factor[4];
3186 unsigned int ref_frame_mask = 0;
3187 unsigned int mode_mask = 0;
3188 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3189 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3190 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3191 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3192 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3193 int best_skip2 = 0;
3195 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3197 // Everywhere the flag is set the error is much higher than its neighbors.
3198 ctx->frames_with_high_error = 0;
3199 ctx->modes_with_high_error = 0;
3201 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3202 &comp_mode_p);
3204 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3205 best_pred_rd[i] = INT64_MAX;
3206 for (i = 0; i < TX_MODES; i++)
3207 best_tx_rd[i] = INT64_MAX;
3208 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3209 best_filter_rd[i] = INT64_MAX;
3210 for (i = 0; i < TX_SIZES; i++)
3211 rate_uv_intra[i] = INT_MAX;
3213 *returnrate = INT_MAX;
3215 // Create a mask set to 1 for each reference frame used by a smaller
3216 // resolution.
3217 if (cpi->sf.use_avoid_tested_higherror) {
3218 switch (block_size) {
3219 case BLOCK_64X64:
3220 for (i = 0; i < 4; i++) {
3221 for (j = 0; j < 4; j++) {
3222 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3223 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3224 }
3225 }
3226 for (i = 0; i < 4; i++) {
3227 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
3228 mode_mask |= x->sb32_context[i].modes_with_high_error;
3229 }
3230 break;
3231 case BLOCK_32X32:
3232 for (i = 0; i < 4; i++) {
3233 ref_frame_mask |=
3234 x->mb_context[x->sb_index][i].frames_with_high_error;
3235 mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error;
3236 }
3237 break;
3238 default:
3239 // Until we handle all block sizes set it to present;
3240 ref_frame_mask = 0;
3241 mode_mask = 0;
3242 break;
3243 }
3244 ref_frame_mask = ~ref_frame_mask;
3245 mode_mask = ~mode_mask;
3246 }
3248 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3249 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3250 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
3251 block_size, mi_row, mi_col,
3252 frame_mv[NEARESTMV], frame_mv[NEARMV],
3253 yv12_mb, scale_factor);
3254 }
3255 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3256 frame_mv[ZEROMV][ref_frame].as_int = 0;
3257 }
3259 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3260 int mode_excluded = 0;
3261 int64_t this_rd = INT64_MAX;
3262 int disable_skip = 0;
3263 int compmode_cost = 0;
3264 int rate2 = 0, rate_y = 0, rate_uv = 0;
3265 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3266 int skippable = 0;
3267 int64_t tx_cache[TX_MODES];
3268 int i;
3269 int this_skip2 = 0;
3270 int64_t total_sse = INT_MAX;
3271 int early_term = 0;
3273 for (i = 0; i < TX_MODES; ++i)
3274 tx_cache[i] = INT64_MAX;
3276 x->skip = 0;
3277 this_mode = vp9_mode_order[mode_index].mode;
3278 ref_frame = vp9_mode_order[mode_index].ref_frame;
3279 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3281 // Look at the reference frame of the best mode so far and set the
3282 // skip mask to look at a subset of the remaining modes.
3283 if (mode_index > cpi->sf.mode_skip_start) {
3284 if (mode_index == (cpi->sf.mode_skip_start + 1)) {
3285 switch (vp9_mode_order[best_mode_index].ref_frame) {
3286 case INTRA_FRAME:
3287 cpi->mode_skip_mask = 0;
3288 break;
3289 case LAST_FRAME:
3290 cpi->mode_skip_mask = LAST_FRAME_MODE_MASK;
3291 break;
3292 case GOLDEN_FRAME:
3293 cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
3294 break;
3295 case ALTREF_FRAME:
3296 cpi->mode_skip_mask = ALT_REF_MODE_MASK;
3297 break;
3298 case NONE:
3299 case MAX_REF_FRAMES:
3300 assert(!"Invalid Reference frame");
3301 }
3302 }
3303 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3304 continue;
3305 }
3307 // Skip if the current reference frame has been masked off
3308 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3309 (cpi->ref_frame_mask & (1 << ref_frame)))
3310 continue;
3312 // Test best rd so far against threshold for trying this mode.
3313 if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
3314 cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
3315 cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
3316 continue;
3318 // Do not allow compound prediction if the segment level reference
3319 // frame feature is in use as in this case there can only be one reference.
3320 if ((second_ref_frame > INTRA_FRAME) &&
3321 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3322 continue;
3324 // Skip some checking based on small partitions' result.
3325 if (x->fast_ms > 1 && !ref_frame)
3326 continue;
3327 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3328 continue;
3330 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
3331 if (!(ref_frame_mask & (1 << ref_frame))) {
3332 continue;
3333 }
3334 if (!(mode_mask & (1 << this_mode))) {
3335 continue;
3336 }
3337 if (second_ref_frame != NONE
3338 && !(ref_frame_mask & (1 << second_ref_frame))) {
3339 continue;
3340 }
3341 }
3343 mbmi->ref_frame[0] = ref_frame;
3344 mbmi->ref_frame[1] = second_ref_frame;
3346 if (!(ref_frame == INTRA_FRAME
3347 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3348 continue;
3349 }
3350 if (!(second_ref_frame == NONE
3351 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3352 continue;
3353 }
3355 comp_pred = second_ref_frame > INTRA_FRAME;
3356 if (comp_pred) {
3357 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3358 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3359 continue;
3360 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3361 if (ref_frame != best_inter_ref_frame &&
3362 second_ref_frame != best_inter_ref_frame)
3363 continue;
3364 }
3366 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3367 mbmi->uv_mode = DC_PRED;
3369 // Evaluate all sub-pel filters irrespective of whether we can use
3370 // them for this frame.
3371 mbmi->interp_filter = cm->mcomp_filter_type;
3372 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
3374 if (comp_pred) {
3375 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3376 continue;
3377 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
3379 mode_excluded = mode_excluded
3380 ? mode_excluded
3381 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
3382 } else {
3383 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3384 mode_excluded =
3385 mode_excluded ?
3386 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
3387 }
3388 }
3390 // Select prediction reference frames.
3391 for (i = 0; i < MAX_MB_PLANE; i++) {
3392 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3393 if (comp_pred)
3394 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3395 }
3397 // If the segment reference frame feature is enabled....
3398 // then do nothing if the current ref frame is not allowed..
3399 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3400 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3401 (int)ref_frame) {
3402 continue;
3403 // If the segment skip feature is enabled....
3404 // then do nothing if the current mode is not allowed..
3405 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3406 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3407 continue;
3408 // Disable this drop out case if the ref frame
3409 // segment level feature is enabled for this segment. This is to
3410 // prevent the possibility that we end up unable to pick any mode.
3411 } else if (!vp9_segfeature_active(seg, segment_id,
3412 SEG_LVL_REF_FRAME)) {
3413 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3414 // unless ARNR filtering is enabled in which case we want
3415 // an unfiltered alternative. We allow near/nearest as well
3416 // because they may result in zero-zero MVs but be cheaper.
3417 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3418 if ((this_mode != ZEROMV &&
3419 !(this_mode == NEARMV &&
3420 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3421 !(this_mode == NEARESTMV &&
3422 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3423 ref_frame != ALTREF_FRAME) {
3424 continue;
3425 }
3426 }
3427 }
3428 // TODO(JBB): This is to make up for the fact that we don't have sad
3429 // functions that work when the block size reads outside the umv. We
3430 // should fix this either by making the motion search just work on
3431 // a representative block in the boundary ( first ) and then implement a
3432 // function that does sads when inside the border..
3433 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3434 this_mode == NEWMV) {
3435 continue;
3436 }
3438 #ifdef MODE_TEST_HIT_STATS
3439 // TEST/DEBUG CODE
3440 // Keep a rcord of the number of test hits at each size
3441 cpi->mode_test_hits[bsize]++;
3442 #endif
3445 if (ref_frame == INTRA_FRAME) {
3446 TX_SIZE uv_tx;
3447 // Disable intra modes other than DC_PRED for blocks with low variance
3448 // Threshold for intra skipping based on source variance
3449 // TODO(debargha): Specialize the threshold for super block sizes
3450 static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
3451 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3452 };
3453 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3454 this_mode != DC_PRED &&
3455 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3456 continue;
3457 // Only search the oblique modes if the best so far is
3458 // one of the neighboring directional modes
3459 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3460 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3461 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3462 continue;
3463 }
3464 mbmi->mode = this_mode;
3465 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3466 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3467 continue;
3468 }
3470 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3471 bsize, tx_cache, best_rd);
3473 if (rate_y == INT_MAX)
3474 continue;
3476 uv_tx = MIN(mbmi->tx_size, max_uv_txsize_lookup[bsize]);
3477 if (rate_uv_intra[uv_tx] == INT_MAX) {
3478 choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[uv_tx],
3479 &rate_uv_tokenonly[uv_tx],
3480 &dist_uv[uv_tx], &skip_uv[uv_tx],
3481 &mode_uv[uv_tx]);
3482 }
3484 rate_uv = rate_uv_tokenonly[uv_tx];
3485 distortion_uv = dist_uv[uv_tx];
3486 skippable = skippable && skip_uv[uv_tx];
3487 mbmi->uv_mode = mode_uv[uv_tx];
3489 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3490 if (this_mode != DC_PRED && this_mode != TM_PRED)
3491 rate2 += intra_cost_penalty;
3492 distortion2 = distortion_y + distortion_uv;
3493 } else {
3494 mbmi->mode = this_mode;
3495 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3496 this_rd = handle_inter_mode(cpi, x, tile, bsize,
3497 tx_cache,
3498 &rate2, &distortion2, &skippable,
3499 &rate_y, &distortion_y,
3500 &rate_uv, &distortion_uv,
3501 &mode_excluded, &disable_skip,
3502 &tmp_best_filter, frame_mv,
3503 mi_row, mi_col,
3504 single_newmv, &total_sse, best_rd);
3505 if (this_rd == INT64_MAX)
3506 continue;
3507 }
3509 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
3510 rate2 += compmode_cost;
3511 }
3513 // Estimate the reference frame signaling cost and add it
3514 // to the rolling cost variable.
3515 if (second_ref_frame > INTRA_FRAME) {
3516 rate2 += ref_costs_comp[ref_frame];
3517 } else {
3518 rate2 += ref_costs_single[ref_frame];
3519 }
3521 if (!disable_skip) {
3522 // Test for the condition where skip block will be activated
3523 // because there are no non zero coefficients and make any
3524 // necessary adjustment for rate. Ignore if skip is coded at
3525 // segment level as the cost wont have been added in.
3526 // Is Mb level skip allowed (i.e. not coded at segment level).
3527 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3528 SEG_LVL_SKIP);
3530 if (skippable) {
3531 // Back out the coefficient coding costs
3532 rate2 -= (rate_y + rate_uv);
3533 // for best yrd calculation
3534 rate_uv = 0;
3536 if (mb_skip_allowed) {
3537 int prob_skip_cost;
3539 // Cost the skip mb case
3540 vp9_prob skip_prob =
3541 vp9_get_pred_prob_mbskip(cm, xd);
3543 if (skip_prob) {
3544 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3545 rate2 += prob_skip_cost;
3546 }
3547 }
3548 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3549 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3550 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3551 // Add in the cost of the no skip flag.
3552 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3553 0);
3554 rate2 += prob_skip_cost;
3555 } else {
3556 // FIXME(rbultje) make this work for splitmv also
3557 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3558 1);
3559 rate2 += prob_skip_cost;
3560 distortion2 = total_sse;
3561 assert(total_sse >= 0);
3562 rate2 -= (rate_y + rate_uv);
3563 rate_y = 0;
3564 rate_uv = 0;
3565 this_skip2 = 1;
3566 }
3567 } else if (mb_skip_allowed) {
3568 // Add in the cost of the no skip flag.
3569 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
3570 0);
3571 rate2 += prob_skip_cost;
3572 }
3574 // Calculate the final RD estimate for this mode.
3575 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3576 }
3578 // Keep record of best intra rd
3579 if (!is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3580 this_rd < best_intra_rd) {
3581 best_intra_rd = this_rd;
3582 best_intra_mode = xd->mi_8x8[0]->mbmi.mode;
3583 }
3585 // Keep record of best inter rd with single reference
3586 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3587 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
3588 !mode_excluded && this_rd < best_inter_rd) {
3589 best_inter_rd = this_rd;
3590 best_inter_ref_frame = ref_frame;
3591 }
3593 if (!disable_skip && ref_frame == INTRA_FRAME) {
3594 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3595 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3596 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3597 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3598 }
3600 // Store the respective mode distortions for later use.
3601 if (mode_distortions[this_mode] == -1
3602 || distortion2 < mode_distortions[this_mode]) {
3603 mode_distortions[this_mode] = distortion2;
3604 }
3605 if (frame_distortions[ref_frame] == -1
3606 || distortion2 < frame_distortions[ref_frame]) {
3607 frame_distortions[ref_frame] = distortion2;
3608 }
3610 // Did this mode help.. i.e. is it the new best mode
3611 if (this_rd < best_rd || x->skip) {
3612 int max_plane = MAX_MB_PLANE;
3613 if (!mode_excluded) {
3614 // Note index of best mode so far
3615 best_mode_index = mode_index;
3617 if (ref_frame == INTRA_FRAME) {
3618 /* required for left and above block mv */
3619 mbmi->mv[0].as_int = 0;
3620 max_plane = 1;
3621 }
3623 *returnrate = rate2;
3624 *returndistortion = distortion2;
3625 best_rd = this_rd;
3626 best_mbmode = *mbmi;
3627 best_skip2 = this_skip2;
3628 if (!x->select_txfm_size)
3629 swap_block_ptr(x, ctx, max_plane);
3630 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3631 sizeof(uint8_t) * ctx->num_4x4_blk);
3633 // TODO(debargha): enhance this test with a better distortion prediction
3634 // based on qp, activity mask and history
3635 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3636 (mode_index > MIN_EARLY_TERM_INDEX)) {
3637 const int qstep = xd->plane[0].dequant[1];
3638 // TODO(debargha): Enhance this by specializing for each mode_index
3639 int scale = 4;
3640 if (x->source_variance < UINT_MAX) {
3641 const int var_adjust = (x->source_variance < 16);
3642 scale -= var_adjust;
3643 }
3644 if (ref_frame > INTRA_FRAME &&
3645 distortion2 * scale < qstep * qstep) {
3646 early_term = 1;
3647 }
3648 }
3649 }
3650 }
3652 /* keep record of best compound/single-only prediction */
3653 if (!disable_skip && ref_frame != INTRA_FRAME) {
3654 int single_rd, hybrid_rd, single_rate, hybrid_rate;
3656 if (cm->comp_pred_mode == HYBRID_PREDICTION) {
3657 single_rate = rate2 - compmode_cost;
3658 hybrid_rate = rate2;
3659 } else {
3660 single_rate = rate2;
3661 hybrid_rate = rate2 + compmode_cost;
3662 }
3664 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3665 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3667 if (second_ref_frame <= INTRA_FRAME &&
3668 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
3669 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
3670 } else if (second_ref_frame > INTRA_FRAME &&
3671 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
3672 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
3673 }
3674 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
3675 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
3676 }
3678 /* keep record of best filter type */
3679 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3680 cm->mcomp_filter_type != BILINEAR) {
3681 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3682 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
3683 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3684 int64_t adj_rd;
3685 // In cases of poor prediction, filter_cache[] can contain really big
3686 // values, which actually are bigger than this_rd itself. This can
3687 // cause negative best_filter_rd[] values, which is obviously silly.
3688 // Therefore, if filter_cache < ref, we do an adjusted calculation.
3689 if (cpi->rd_filter_cache[i] >= ref) {
3690 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
3691 } else {
3692 // FIXME(rbultje) do this for comppsred also
3693 //
3694 // To prevent out-of-range computation in
3695 // adj_rd = cpi->rd_filter_cache[i] * this_rd / ref
3696 // cpi->rd_filter_cache[i] / ref is converted to a 256 based ratio.
3697 int tmp = cpi->rd_filter_cache[i] * 256 / ref;
3698 adj_rd = (this_rd * tmp) >> 8;
3699 }
3700 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3701 }
3702 }
3704 /* keep record of best txfm size */
3705 if (bsize < BLOCK_32X32) {
3706 if (bsize < BLOCK_16X16)
3707 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3709 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3710 }
3711 if (!mode_excluded && this_rd != INT64_MAX) {
3712 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3713 int64_t adj_rd = INT64_MAX;
3714 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3716 if (adj_rd < best_tx_rd[i])
3717 best_tx_rd[i] = adj_rd;
3718 }
3719 }
3721 if (early_term)
3722 break;
3724 if (x->skip && !comp_pred)
3725 break;
3726 }
3728 if (best_rd >= best_rd_so_far)
3729 return INT64_MAX;
3731 // If we used an estimate for the uv intra rd in the loop above...
3732 if (cpi->sf.use_uv_intra_rd_estimate) {
3733 // Do Intra UV best rd mode selection if best mode choice above was intra.
3734 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3735 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
3736 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3737 &rate_uv_tokenonly[uv_tx_size],
3738 &dist_uv[uv_tx_size],
3739 &skip_uv[uv_tx_size],
3740 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
3741 }
3742 }
3744 // If we are using reference masking and the set mask flag is set then
3745 // create the reference frame mask.
3746 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
3747 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
3749 // Flag all modes that have a distortion thats > 2x the best we found at
3750 // this level.
3751 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3752 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3753 continue;
3755 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3756 ctx->modes_with_high_error |= (1 << mode_index);
3757 }
3758 }
3760 // Flag all ref frames that have a distortion thats > 2x the best we found at
3761 // this level.
3762 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3763 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3764 ctx->frames_with_high_error |= (1 << ref_frame);
3765 }
3766 }
3768 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3769 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3770 (best_mbmode.ref_frame[0] == INTRA_FRAME));
3772 // Updating rd_thresh_freq_fact[] here means that the different
3773 // partition/block sizes are handled independently based on the best
3774 // choice for the current partition. It may well be better to keep a scaled
3775 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3776 // combination that wins out.
3777 if (cpi->sf.adaptive_rd_thresh) {
3778 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3779 if (mode_index == best_mode_index) {
3780 cpi->rd_thresh_freq_fact[bsize][mode_index] -=
3781 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
3782 } else {
3783 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
3784 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3785 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
3786 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3787 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
3788 }
3789 }
3790 }
3791 }
3793 // macroblock modes
3794 *mbmi = best_mbmode;
3795 x->skip |= best_skip2;
3797 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
3798 if (best_pred_rd[i] == INT64_MAX)
3799 best_pred_diff[i] = INT_MIN;
3800 else
3801 best_pred_diff[i] = best_rd - best_pred_rd[i];
3802 }
3804 if (!x->skip) {
3805 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3806 if (best_filter_rd[i] == INT64_MAX)
3807 best_filter_diff[i] = 0;
3808 else
3809 best_filter_diff[i] = best_rd - best_filter_rd[i];
3810 }
3811 if (cm->mcomp_filter_type == SWITCHABLE)
3812 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3813 } else {
3814 vp9_zero(best_filter_diff);
3815 }
3817 if (!x->skip) {
3818 for (i = 0; i < TX_MODES; i++) {
3819 if (best_tx_rd[i] == INT64_MAX)
3820 best_tx_diff[i] = 0;
3821 else
3822 best_tx_diff[i] = best_rd - best_tx_rd[i];
3823 }
3824 } else {
3825 vp9_zero(best_tx_diff);
3826 }
3828 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
3829 scale_factor);
3830 store_coding_context(x, ctx, best_mode_index,
3831 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3832 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3833 mbmi->ref_frame[1]][0],
3834 best_pred_diff, best_tx_diff, best_filter_diff);
3836 return best_rd;
3837 }
3840 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3841 const TileInfo *const tile,
3842 int mi_row, int mi_col,
3843 int *returnrate,
3844 int64_t *returndistortion,
3845 BLOCK_SIZE bsize,
3846 PICK_MODE_CONTEXT *ctx,
3847 int64_t best_rd_so_far) {
3848 VP9_COMMON *cm = &cpi->common;
3849 MACROBLOCKD *xd = &x->e_mbd;
3850 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3851 const struct segmentation *seg = &cm->seg;
3852 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3853 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3854 unsigned char segment_id = mbmi->segment_id;
3855 int comp_pred, i;
3856 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3857 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3858 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3859 VP9_ALT_FLAG };
3860 int idx_list[4] = {0,
3861 cpi->lst_fb_idx,
3862 cpi->gld_fb_idx,
3863 cpi->alt_fb_idx};
3864 int64_t best_rd = best_rd_so_far;
3865 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3866 int64_t best_tx_rd[TX_MODES];
3867 int64_t best_tx_diff[TX_MODES];
3868 int64_t best_pred_diff[NB_PREDICTION_TYPES];
3869 int64_t best_pred_rd[NB_PREDICTION_TYPES];
3870 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3871 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3872 MB_MODE_INFO best_mbmode = { 0 };
3873 int mode_index, best_mode_index = 0;
3874 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3875 vp9_prob comp_mode_p;
3876 int64_t best_inter_rd = INT64_MAX;
3877 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3878 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
3879 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3880 int64_t dist_uv[TX_SIZES];
3881 int skip_uv[TX_SIZES];
3882 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3883 struct scale_factors scale_factor[4];
3884 unsigned int ref_frame_mask = 0;
3885 unsigned int mode_mask = 0;
3886 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3887 cpi->common.y_dc_delta_q);
3888 int_mv seg_mvs[4][MAX_REF_FRAMES];
3889 b_mode_info best_bmodes[4];
3890 int best_skip2 = 0;
3892 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3893 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3895 for (i = 0; i < 4; i++) {
3896 int j;
3897 for (j = 0; j < MAX_REF_FRAMES; j++)
3898 seg_mvs[i][j].as_int = INVALID_MV;
3899 }
3901 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3902 &comp_mode_p);
3904 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
3905 best_pred_rd[i] = INT64_MAX;
3906 for (i = 0; i < TX_MODES; i++)
3907 best_tx_rd[i] = INT64_MAX;
3908 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3909 best_filter_rd[i] = INT64_MAX;
3910 for (i = 0; i < TX_SIZES; i++)
3911 rate_uv_intra[i] = INT_MAX;
3913 *returnrate = INT_MAX;
3915 // Create a mask set to 1 for each reference frame used by a smaller
3916 // resolution.
3917 if (cpi->sf.use_avoid_tested_higherror) {
3918 ref_frame_mask = 0;
3919 mode_mask = 0;
3920 ref_frame_mask = ~ref_frame_mask;
3921 mode_mask = ~mode_mask;
3922 }
3924 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3925 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3926 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
3927 block_size, mi_row, mi_col,
3928 frame_mv[NEARESTMV], frame_mv[NEARMV],
3929 yv12_mb, scale_factor);
3930 }
3931 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3932 frame_mv[ZEROMV][ref_frame].as_int = 0;
3933 }
3935 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3936 int mode_excluded = 0;
3937 int64_t this_rd = INT64_MAX;
3938 int disable_skip = 0;
3939 int compmode_cost = 0;
3940 int rate2 = 0, rate_y = 0, rate_uv = 0;
3941 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3942 int skippable = 0;
3943 int64_t tx_cache[TX_MODES];
3944 int i;
3945 int this_skip2 = 0;
3946 int64_t total_sse = INT_MAX;
3947 int early_term = 0;
3949 for (i = 0; i < TX_MODES; ++i)
3950 tx_cache[i] = INT64_MAX;
3952 x->skip = 0;
3953 ref_frame = vp9_ref_order[mode_index].ref_frame;
3954 second_ref_frame = vp9_ref_order[mode_index].second_ref_frame;
3956 // Look at the reference frame of the best mode so far and set the
3957 // skip mask to look at a subset of the remaining modes.
3958 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3959 if (mode_index == 3) {
3960 switch (vp9_ref_order[best_mode_index].ref_frame) {
3961 case INTRA_FRAME:
3962 cpi->mode_skip_mask = 0;
3963 break;
3964 case LAST_FRAME:
3965 cpi->mode_skip_mask = 0x0010;
3966 break;
3967 case GOLDEN_FRAME:
3968 cpi->mode_skip_mask = 0x0008;
3969 break;
3970 case ALTREF_FRAME:
3971 cpi->mode_skip_mask = 0x0000;
3972 break;
3973 case NONE:
3974 case MAX_REF_FRAMES:
3975 assert(!"Invalid Reference frame");
3976 }
3977 }
3978 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3979 continue;
3980 }
3982 // Skip if the current reference frame has been masked off
3983 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3984 (cpi->ref_frame_mask & (1 << ref_frame)))
3985 continue;
3987 // Test best rd so far against threshold for trying this mode.
3988 if ((best_rd <
3989 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3990 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3991 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3992 continue;
3994 // Do not allow compound prediction if the segment level reference
3995 // frame feature is in use as in this case there can only be one reference.
3996 if ((second_ref_frame > INTRA_FRAME) &&
3997 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3998 continue;
4000 mbmi->ref_frame[0] = ref_frame;
4001 mbmi->ref_frame[1] = second_ref_frame;
4003 if (!(ref_frame == INTRA_FRAME
4004 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
4005 continue;
4006 }
4007 if (!(second_ref_frame == NONE
4008 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
4009 continue;
4010 }
4012 comp_pred = second_ref_frame > INTRA_FRAME;
4013 if (comp_pred) {
4014 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
4015 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME)
4016 continue;
4017 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
4018 if (ref_frame != best_inter_ref_frame &&
4019 second_ref_frame != best_inter_ref_frame)
4020 continue;
4021 }
4023 // TODO(jingning, jkoleszar): scaling reference frame not supported for
4024 // sub8x8 blocks.
4025 if (ref_frame > 0 &&
4026 vp9_is_scaled(scale_factor[ref_frame].sfc))
4027 continue;
4029 if (second_ref_frame > 0 &&
4030 vp9_is_scaled(scale_factor[second_ref_frame].sfc))
4031 continue;
4033 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
4034 mbmi->uv_mode = DC_PRED;
4036 // Evaluate all sub-pel filters irrespective of whether we can use
4037 // them for this frame.
4038 mbmi->interp_filter = cm->mcomp_filter_type;
4039 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4041 if (comp_pred) {
4042 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
4043 continue;
4044 set_scale_factors(xd, ref_frame, second_ref_frame, scale_factor);
4046 mode_excluded = mode_excluded
4047 ? mode_excluded
4048 : cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
4049 } else {
4050 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
4051 mode_excluded =
4052 mode_excluded ?
4053 mode_excluded : cm->comp_pred_mode == COMP_PREDICTION_ONLY;
4054 }
4055 }
4057 // Select prediction reference frames.
4058 for (i = 0; i < MAX_MB_PLANE; i++) {
4059 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
4060 if (comp_pred)
4061 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
4062 }
4064 // If the segment reference frame feature is enabled....
4065 // then do nothing if the current ref frame is not allowed..
4066 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
4067 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
4068 (int)ref_frame) {
4069 continue;
4070 // If the segment skip feature is enabled....
4071 // then do nothing if the current mode is not allowed..
4072 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
4073 ref_frame != INTRA_FRAME) {
4074 continue;
4075 // Disable this drop out case if the ref frame
4076 // segment level feature is enabled for this segment. This is to
4077 // prevent the possibility that we end up unable to pick any mode.
4078 } else if (!vp9_segfeature_active(seg, segment_id,
4079 SEG_LVL_REF_FRAME)) {
4080 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
4081 // unless ARNR filtering is enabled in which case we want
4082 // an unfiltered alternative. We allow near/nearest as well
4083 // because they may result in zero-zero MVs but be cheaper.
4084 if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
4085 continue;
4086 }
4088 #ifdef MODE_TEST_HIT_STATS
4089 // TEST/DEBUG CODE
4090 // Keep a rcord of the number of test hits at each size
4091 cpi->mode_test_hits[bsize]++;
4092 #endif
4094 if (ref_frame == INTRA_FRAME) {
4095 int rate;
4096 mbmi->tx_size = TX_4X4;
4097 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
4098 &distortion_y, best_rd) >= best_rd)
4099 continue;
4100 rate2 += rate;
4101 rate2 += intra_cost_penalty;
4102 distortion2 += distortion_y;
4104 if (rate_uv_intra[TX_4X4] == INT_MAX) {
4105 choose_intra_uv_mode(cpi, ctx, bsize, &rate_uv_intra[TX_4X4],
4106 &rate_uv_tokenonly[TX_4X4],
4107 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
4108 &mode_uv[TX_4X4]);
4109 }
4110 rate2 += rate_uv_intra[TX_4X4];
4111 rate_uv = rate_uv_tokenonly[TX_4X4];
4112 distortion2 += dist_uv[TX_4X4];
4113 distortion_uv = dist_uv[TX_4X4];
4114 mbmi->uv_mode = mode_uv[TX_4X4];
4115 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4116 for (i = 0; i < TX_MODES; ++i)
4117 tx_cache[i] = tx_cache[ONLY_4X4];
4118 } else {
4119 int rate;
4120 int64_t distortion;
4121 int64_t this_rd_thresh;
4122 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4123 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4124 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4125 int tmp_best_skippable = 0;
4126 int switchable_filter_index;
4127 int_mv *second_ref = comp_pred ?
4128 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
4129 b_mode_info tmp_best_bmodes[16];
4130 MB_MODE_INFO tmp_best_mbmode;
4131 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4132 int pred_exists = 0;
4133 int uv_skippable;
4135 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4136 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4137 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4138 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4139 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4140 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
4142 cpi->rd_filter_cache[SWITCHABLE_FILTERS] = INT64_MAX;
4143 if (cm->mcomp_filter_type != BILINEAR) {
4144 tmp_best_filter = EIGHTTAP;
4145 if (x->source_variance <
4146 cpi->sf.disable_filter_search_var_thresh) {
4147 tmp_best_filter = EIGHTTAP;
4148 vp9_zero(cpi->rd_filter_cache);
4149 } else {
4150 for (switchable_filter_index = 0;
4151 switchable_filter_index < SWITCHABLE_FILTERS;
4152 ++switchable_filter_index) {
4153 int newbest, rs;
4154 int64_t rs_rd;
4155 mbmi->interp_filter = switchable_filter_index;
4156 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4158 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4159 &mbmi->ref_mvs[ref_frame][0],
4160 second_ref,
4161 best_yrd,
4162 &rate, &rate_y, &distortion,
4163 &skippable, &total_sse,
4164 (int)this_rd_thresh, seg_mvs,
4165 bsi, switchable_filter_index,
4166 mi_row, mi_col);
4168 if (tmp_rd == INT64_MAX)
4169 continue;
4170 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4171 rs = get_switchable_rate(x);
4172 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4173 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4174 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4175 tmp_rd + rs_rd);
4176 if (cm->mcomp_filter_type == SWITCHABLE)
4177 tmp_rd += rs_rd;
4179 newbest = (tmp_rd < tmp_best_rd);
4180 if (newbest) {
4181 tmp_best_filter = mbmi->interp_filter;
4182 tmp_best_rd = tmp_rd;
4183 }
4184 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
4185 (mbmi->interp_filter == cm->mcomp_filter_type &&
4186 cm->mcomp_filter_type != SWITCHABLE)) {
4187 tmp_best_rdu = tmp_rd;
4188 tmp_best_rate = rate;
4189 tmp_best_ratey = rate_y;
4190 tmp_best_distortion = distortion;
4191 tmp_best_sse = total_sse;
4192 tmp_best_skippable = skippable;
4193 tmp_best_mbmode = *mbmi;
4194 for (i = 0; i < 4; i++) {
4195 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4196 x->zcoeff_blk[TX_4X4][i] = !xd->plane[0].eobs[i];
4197 }
4198 pred_exists = 1;
4199 if (switchable_filter_index == 0 &&
4200 cpi->sf.use_rd_breakout &&
4201 best_rd < INT64_MAX) {
4202 if (tmp_best_rdu / 2 > best_rd) {
4203 // skip searching the other filters if the first is
4204 // already substantially larger than the best so far
4205 tmp_best_filter = mbmi->interp_filter;
4206 tmp_best_rdu = INT64_MAX;
4207 break;
4208 }
4209 }
4210 }
4211 } // switchable_filter_index loop
4212 }
4213 }
4215 if (tmp_best_rdu == INT64_MAX)
4216 continue;
4218 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
4219 tmp_best_filter : cm->mcomp_filter_type);
4220 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4221 if (!pred_exists) {
4222 // Handles the special case when a filter that is not in the
4223 // switchable list (bilinear, 6-tap) is indicated at the frame level
4224 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4225 &mbmi->ref_mvs[ref_frame][0],
4226 second_ref,
4227 best_yrd,
4228 &rate, &rate_y, &distortion,
4229 &skippable, &total_sse,
4230 (int)this_rd_thresh, seg_mvs,
4231 bsi, 0,
4232 mi_row, mi_col);
4233 if (tmp_rd == INT64_MAX)
4234 continue;
4235 } else {
4236 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
4237 int rs = get_switchable_rate(x);
4238 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
4239 }
4240 tmp_rd = tmp_best_rdu;
4241 total_sse = tmp_best_sse;
4242 rate = tmp_best_rate;
4243 rate_y = tmp_best_ratey;
4244 distortion = tmp_best_distortion;
4245 skippable = tmp_best_skippable;
4246 *mbmi = tmp_best_mbmode;
4247 for (i = 0; i < 4; i++)
4248 xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
4249 }
4251 rate2 += rate;
4252 distortion2 += distortion;
4254 if (cpi->common.mcomp_filter_type == SWITCHABLE)
4255 rate2 += get_switchable_rate(x);
4257 if (!mode_excluded) {
4258 if (comp_pred)
4259 mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
4260 else
4261 mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
4262 }
4263 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4265 tmp_best_rdu = best_rd -
4266 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4267 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4269 if (tmp_best_rdu > 0) {
4270 // If even the 'Y' rd value of split is higher than best so far
4271 // then dont bother looking at UV
4272 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4273 BLOCK_8X8);
4274 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4275 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4276 if (rate_uv == INT_MAX)
4277 continue;
4278 rate2 += rate_uv;
4279 distortion2 += distortion_uv;
4280 skippable = skippable && uv_skippable;
4281 total_sse += uv_sse;
4283 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4284 for (i = 0; i < TX_MODES; ++i)
4285 tx_cache[i] = tx_cache[ONLY_4X4];
4286 }
4287 }
4289 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
4290 rate2 += compmode_cost;
4291 }
4293 // Estimate the reference frame signaling cost and add it
4294 // to the rolling cost variable.
4295 if (second_ref_frame > INTRA_FRAME) {
4296 rate2 += ref_costs_comp[ref_frame];
4297 } else {
4298 rate2 += ref_costs_single[ref_frame];
4299 }
4301 if (!disable_skip) {
4302 // Test for the condition where skip block will be activated
4303 // because there are no non zero coefficients and make any
4304 // necessary adjustment for rate. Ignore if skip is coded at
4305 // segment level as the cost wont have been added in.
4306 // Is Mb level skip allowed (i.e. not coded at segment level).
4307 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4308 SEG_LVL_SKIP);
4310 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4311 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4312 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4313 // Add in the cost of the no skip flag.
4314 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
4315 0);
4316 rate2 += prob_skip_cost;
4317 } else {
4318 // FIXME(rbultje) make this work for splitmv also
4319 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
4320 1);
4321 rate2 += prob_skip_cost;
4322 distortion2 = total_sse;
4323 assert(total_sse >= 0);
4324 rate2 -= (rate_y + rate_uv);
4325 rate_y = 0;
4326 rate_uv = 0;
4327 this_skip2 = 1;
4328 }
4329 } else if (mb_skip_allowed) {
4330 // Add in the cost of the no skip flag.
4331 int prob_skip_cost = vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd),
4332 0);
4333 rate2 += prob_skip_cost;
4334 }
4336 // Calculate the final RD estimate for this mode.
4337 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4338 }
4340 // Keep record of best inter rd with single reference
4341 if (xd->mi_8x8[0]->mbmi.ref_frame[0] > INTRA_FRAME &&
4342 xd->mi_8x8[0]->mbmi.ref_frame[1] == NONE &&
4343 !mode_excluded &&
4344 this_rd < best_inter_rd) {
4345 best_inter_rd = this_rd;
4346 best_inter_ref_frame = ref_frame;
4347 }
4349 if (!disable_skip && ref_frame == INTRA_FRAME) {
4350 for (i = 0; i < NB_PREDICTION_TYPES; ++i)
4351 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4352 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4353 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4354 }
4356 // Did this mode help.. i.e. is it the new best mode
4357 if (this_rd < best_rd || x->skip) {
4358 if (!mode_excluded) {
4359 int max_plane = MAX_MB_PLANE;
4360 // Note index of best mode so far
4361 best_mode_index = mode_index;
4363 if (ref_frame == INTRA_FRAME) {
4364 /* required for left and above block mv */
4365 mbmi->mv[0].as_int = 0;
4366 max_plane = 1;
4367 }
4369 *returnrate = rate2;
4370 *returndistortion = distortion2;
4371 best_rd = this_rd;
4372 best_yrd = best_rd -
4373 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4374 best_mbmode = *mbmi;
4375 best_skip2 = this_skip2;
4376 if (!x->select_txfm_size)
4377 swap_block_ptr(x, ctx, max_plane);
4378 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4379 sizeof(uint8_t) * ctx->num_4x4_blk);
4381 for (i = 0; i < 4; i++)
4382 best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4384 // TODO(debargha): enhance this test with a better distortion prediction
4385 // based on qp, activity mask and history
4386 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4387 (mode_index > MIN_EARLY_TERM_INDEX)) {
4388 const int qstep = xd->plane[0].dequant[1];
4389 // TODO(debargha): Enhance this by specializing for each mode_index
4390 int scale = 4;
4391 if (x->source_variance < UINT_MAX) {
4392 const int var_adjust = (x->source_variance < 16);
4393 scale -= var_adjust;
4394 }
4395 if (ref_frame > INTRA_FRAME &&
4396 distortion2 * scale < qstep * qstep) {
4397 early_term = 1;
4398 }
4399 }
4400 }
4401 }
4403 /* keep record of best compound/single-only prediction */
4404 if (!disable_skip && ref_frame != INTRA_FRAME) {
4405 int single_rd, hybrid_rd, single_rate, hybrid_rate;
4407 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
4408 single_rate = rate2 - compmode_cost;
4409 hybrid_rate = rate2;
4410 } else {
4411 single_rate = rate2;
4412 hybrid_rate = rate2 + compmode_cost;
4413 }
4415 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4416 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4418 if (second_ref_frame <= INTRA_FRAME &&
4419 single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
4420 best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
4421 } else if (second_ref_frame > INTRA_FRAME &&
4422 single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
4423 best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
4424 }
4425 if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
4426 best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
4427 }
4429 /* keep record of best filter type */
4430 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4431 cm->mcomp_filter_type != BILINEAR) {
4432 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
4433 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
4434 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4435 int64_t adj_rd;
4436 // In cases of poor prediction, filter_cache[] can contain really big
4437 // values, which actually are bigger than this_rd itself. This can
4438 // cause negative best_filter_rd[] values, which is obviously silly.
4439 // Therefore, if filter_cache < ref, we do an adjusted calculation.
4440 if (cpi->rd_filter_cache[i] >= ref)
4441 adj_rd = this_rd + cpi->rd_filter_cache[i] - ref;
4442 else // FIXME(rbultje) do this for comppred also
4443 adj_rd = this_rd - (ref - cpi->rd_filter_cache[i]) * this_rd / ref;
4444 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4445 }
4446 }
4448 /* keep record of best txfm size */
4449 if (bsize < BLOCK_32X32) {
4450 if (bsize < BLOCK_16X16) {
4451 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4452 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4453 }
4454 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4455 }
4456 if (!mode_excluded && this_rd != INT64_MAX) {
4457 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4458 int64_t adj_rd = INT64_MAX;
4459 if (ref_frame > INTRA_FRAME)
4460 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4461 else
4462 adj_rd = this_rd;
4464 if (adj_rd < best_tx_rd[i])
4465 best_tx_rd[i] = adj_rd;
4466 }
4467 }
4469 if (early_term)
4470 break;
4472 if (x->skip && !comp_pred)
4473 break;
4474 }
4476 if (best_rd >= best_rd_so_far)
4477 return INT64_MAX;
4479 // If we used an estimate for the uv intra rd in the loop above...
4480 if (cpi->sf.use_uv_intra_rd_estimate) {
4481 // Do Intra UV best rd mode selection if best mode choice above was intra.
4482 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
4483 TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
4484 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4485 &rate_uv_tokenonly[uv_tx_size],
4486 &dist_uv[uv_tx_size],
4487 &skip_uv[uv_tx_size],
4488 BLOCK_8X8);
4489 }
4490 }
4492 // If we are using reference masking and the set mask flag is set then
4493 // create the reference frame mask.
4494 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
4495 cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame);
4497 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4498 *returnrate = INT_MAX;
4499 *returndistortion = INT_MAX;
4500 return best_rd;
4501 }
4503 assert((cm->mcomp_filter_type == SWITCHABLE) ||
4504 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
4505 (best_mbmode.ref_frame[0] == INTRA_FRAME));
4507 // Updating rd_thresh_freq_fact[] here means that the different
4508 // partition/block sizes are handled independently based on the best
4509 // choice for the current partition. It may well be better to keep a scaled
4510 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4511 // combination that wins out.
4512 if (cpi->sf.adaptive_rd_thresh) {
4513 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4514 if (mode_index == best_mode_index) {
4515 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -=
4516 (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3);
4517 } else {
4518 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC;
4519 if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >
4520 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
4521 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] =
4522 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
4523 }
4524 }
4525 }
4526 }
4528 // macroblock modes
4529 *mbmi = best_mbmode;
4530 x->skip |= best_skip2;
4531 if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4532 for (i = 0; i < 4; i++)
4533 xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4534 } else {
4535 for (i = 0; i < 4; ++i)
4536 vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4538 mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
4539 mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
4540 }
4542 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
4543 if (best_pred_rd[i] == INT64_MAX)
4544 best_pred_diff[i] = INT_MIN;
4545 else
4546 best_pred_diff[i] = best_rd - best_pred_rd[i];
4547 }
4549 if (!x->skip) {
4550 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4551 if (best_filter_rd[i] == INT64_MAX)
4552 best_filter_diff[i] = 0;
4553 else
4554 best_filter_diff[i] = best_rd - best_filter_rd[i];
4555 }
4556 if (cm->mcomp_filter_type == SWITCHABLE)
4557 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4558 } else {
4559 vp9_zero(best_filter_diff);
4560 }
4562 if (!x->skip) {
4563 for (i = 0; i < TX_MODES; i++) {
4564 if (best_tx_rd[i] == INT64_MAX)
4565 best_tx_diff[i] = 0;
4566 else
4567 best_tx_diff[i] = best_rd - best_tx_rd[i];
4568 }
4569 } else {
4570 vp9_zero(best_tx_diff);
4571 }
4573 set_scale_factors(xd, mbmi->ref_frame[0], mbmi->ref_frame[1],
4574 scale_factor);
4575 store_coding_context(x, ctx, best_mode_index,
4576 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4577 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4578 mbmi->ref_frame[1]][0],
4579 best_pred_diff, best_tx_diff, best_filter_diff);
4581 return best_rd;
4582 }