media/libvpx/vp9/encoder/vp9_encodeframe.c

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:44e49dc00014
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "./vp9_rtcd.h"
16 #include "./vpx_config.h"
17
18 #include "vpx_ports/vpx_timer.h"
19
20 #include "vp9/common/vp9_common.h"
21 #include "vp9/common/vp9_entropy.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_extend.h"
24 #include "vp9/common/vp9_findnearmv.h"
25 #include "vp9/common/vp9_idct.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconintra.h"
30 #include "vp9/common/vp9_reconinter.h"
31 #include "vp9/common/vp9_seg_common.h"
32 #include "vp9/common/vp9_tile_common.h"
33 #include "vp9/encoder/vp9_encodeframe.h"
34 #include "vp9/encoder/vp9_encodeintra.h"
35 #include "vp9/encoder/vp9_encodemb.h"
36 #include "vp9/encoder/vp9_encodemv.h"
37 #include "vp9/encoder/vp9_onyx_int.h"
38 #include "vp9/encoder/vp9_rdopt.h"
39 #include "vp9/encoder/vp9_segmentation.h"
40 #include "vp9/common/vp9_systemdependent.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 #include "vp9/encoder/vp9_vaq.h"
43
44
45 #define DBG_PRNT_SEGMAP 0
46
47
48 // #define ENC_DEBUG
49 #ifdef ENC_DEBUG
50 int enc_debug = 0;
51 #endif
52
53 static INLINE uint8_t *get_sb_index(MACROBLOCK *x, BLOCK_SIZE subsize) {
54 switch (subsize) {
55 case BLOCK_64X64:
56 case BLOCK_64X32:
57 case BLOCK_32X64:
58 case BLOCK_32X32:
59 return &x->sb_index;
60 case BLOCK_32X16:
61 case BLOCK_16X32:
62 case BLOCK_16X16:
63 return &x->mb_index;
64 case BLOCK_16X8:
65 case BLOCK_8X16:
66 case BLOCK_8X8:
67 return &x->b_index;
68 case BLOCK_8X4:
69 case BLOCK_4X8:
70 case BLOCK_4X4:
71 return &x->ab_index;
72 default:
73 assert(0);
74 return NULL;
75 }
76 }
77
78 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
79 int mi_row, int mi_col, BLOCK_SIZE bsize);
80
81 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x);
82
83 /* activity_avg must be positive, or flat regions could get a zero weight
84 * (infinite lambda), which confounds analysis.
85 * This also avoids the need for divide by zero checks in
86 * vp9_activity_masking().
87 */
88 #define ACTIVITY_AVG_MIN (64)
89
90 /* Motion vector component magnitude threshold for defining fast motion. */
91 #define FAST_MOTION_MV_THRESH (24)
92
93 /* This is used as a reference when computing the source variance for the
94 * purposes of activity masking.
95 * Eventually this should be replaced by custom no-reference routines,
96 * which will be faster.
97 */
98 static const uint8_t VP9_VAR_OFFS[64] = {
99 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128,
105 128, 128, 128, 128, 128, 128, 128, 128,
106 128, 128, 128, 128, 128, 128, 128, 128
107 };
108
109 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
110 BLOCK_SIZE bs) {
111 unsigned int var, sse;
112 var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
113 x->plane[0].src.stride,
114 VP9_VAR_OFFS, 0, &sse);
115 return (var + (1 << (num_pels_log2_lookup[bs] - 1))) >>
116 num_pels_log2_lookup[bs];
117 }
118
119 // Original activity measure from Tim T's code.
120 static unsigned int tt_activity_measure(MACROBLOCK *x) {
121 unsigned int act;
122 unsigned int sse;
123 /* TODO: This could also be done over smaller areas (8x8), but that would
124 * require extensive changes elsewhere, as lambda is assumed to be fixed
125 * over an entire MB in most of the code.
126 * Another option is to compute four 8x8 variances, and pick a single
127 * lambda using a non-linear combination (e.g., the smallest, or second
128 * smallest, etc.).
129 */
130 act = vp9_variance16x16(x->plane[0].src.buf, x->plane[0].src.stride,
131 VP9_VAR_OFFS, 0, &sse);
132 act <<= 4;
133
134 /* If the region is flat, lower the activity some more. */
135 if (act < 8 << 12)
136 act = act < 5 << 12 ? act : 5 << 12;
137
138 return act;
139 }
140
141 // Stub for alternative experimental activity measures.
142 static unsigned int alt_activity_measure(MACROBLOCK *x, int use_dc_pred) {
143 return vp9_encode_intra(x, use_dc_pred);
144 }
145
146 // Measure the activity of the current macroblock
147 // What we measure here is TBD so abstracted to this function
148 #define ALT_ACT_MEASURE 1
149 static unsigned int mb_activity_measure(MACROBLOCK *x, int mb_row, int mb_col) {
150 unsigned int mb_activity;
151
152 if (ALT_ACT_MEASURE) {
153 int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
154
155 // Or use and alternative.
156 mb_activity = alt_activity_measure(x, use_dc_pred);
157 } else {
158 // Original activity measure from Tim T's code.
159 mb_activity = tt_activity_measure(x);
160 }
161
162 if (mb_activity < ACTIVITY_AVG_MIN)
163 mb_activity = ACTIVITY_AVG_MIN;
164
165 return mb_activity;
166 }
167
168 // Calculate an "average" mb activity value for the frame
169 #define ACT_MEDIAN 0
170 static void calc_av_activity(VP9_COMP *cpi, int64_t activity_sum) {
171 #if ACT_MEDIAN
172 // Find median: Simple n^2 algorithm for experimentation
173 {
174 unsigned int median;
175 unsigned int i, j;
176 unsigned int *sortlist;
177 unsigned int tmp;
178
179 // Create a list to sort to
180 CHECK_MEM_ERROR(&cpi->common, sortlist, vpx_calloc(sizeof(unsigned int),
181 cpi->common.MBs));
182
183 // Copy map to sort list
184 vpx_memcpy(sortlist, cpi->mb_activity_map,
185 sizeof(unsigned int) * cpi->common.MBs);
186
187 // Ripple each value down to its correct position
188 for (i = 1; i < cpi->common.MBs; i ++) {
189 for (j = i; j > 0; j --) {
190 if (sortlist[j] < sortlist[j - 1]) {
191 // Swap values
192 tmp = sortlist[j - 1];
193 sortlist[j - 1] = sortlist[j];
194 sortlist[j] = tmp;
195 } else {
196 break;
197 }
198 }
199 }
200
201 // Even number MBs so estimate median as mean of two either side.
202 median = (1 + sortlist[cpi->common.MBs >> 1] +
203 sortlist[(cpi->common.MBs >> 1) + 1]) >> 1;
204
205 cpi->activity_avg = median;
206
207 vpx_free(sortlist);
208 }
209 #else
210 // Simple mean for now
211 cpi->activity_avg = (unsigned int) (activity_sum / cpi->common.MBs);
212 #endif // ACT_MEDIAN
213
214 if (cpi->activity_avg < ACTIVITY_AVG_MIN)
215 cpi->activity_avg = ACTIVITY_AVG_MIN;
216
217 // Experimental code: return fixed value normalized for several clips
218 if (ALT_ACT_MEASURE)
219 cpi->activity_avg = 100000;
220 }
221
222 #define USE_ACT_INDEX 0
223 #define OUTPUT_NORM_ACT_STATS 0
224
225 #if USE_ACT_INDEX
226 // Calculate an activity index for each mb
227 static void calc_activity_index(VP9_COMP *cpi, MACROBLOCK *x) {
228 VP9_COMMON *const cm = &cpi->common;
229 int mb_row, mb_col;
230
231 int64_t act;
232 int64_t a;
233 int64_t b;
234
235 #if OUTPUT_NORM_ACT_STATS
236 FILE *f = fopen("norm_act.stt", "a");
237 fprintf(f, "\n%12d\n", cpi->activity_avg);
238 #endif
239
240 // Reset pointers to start of activity map
241 x->mb_activity_ptr = cpi->mb_activity_map;
242
243 // Calculate normalized mb activity number.
244 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
245 // for each macroblock col in image
246 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
247 // Read activity from the map
248 act = *(x->mb_activity_ptr);
249
250 // Calculate a normalized activity number
251 a = act + 4 * cpi->activity_avg;
252 b = 4 * act + cpi->activity_avg;
253
254 if (b >= a)
255 *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1;
256 else
257 *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b);
258
259 #if OUTPUT_NORM_ACT_STATS
260 fprintf(f, " %6d", *(x->mb_activity_ptr));
261 #endif
262 // Increment activity map pointers
263 x->mb_activity_ptr++;
264 }
265
266 #if OUTPUT_NORM_ACT_STATS
267 fprintf(f, "\n");
268 #endif
269 }
270
271 #if OUTPUT_NORM_ACT_STATS
272 fclose(f);
273 #endif
274 }
275 #endif // USE_ACT_INDEX
276
277 // Loop through all MBs. Note activity of each, average activity and
278 // calculate a normalized activity for each
279 static void build_activity_map(VP9_COMP *cpi) {
280 MACROBLOCK * const x = &cpi->mb;
281 MACROBLOCKD *xd = &x->e_mbd;
282 VP9_COMMON * const cm = &cpi->common;
283
284 #if ALT_ACT_MEASURE
285 YV12_BUFFER_CONFIG *new_yv12 = get_frame_new_buffer(cm);
286 int recon_yoffset;
287 int recon_y_stride = new_yv12->y_stride;
288 #endif
289
290 int mb_row, mb_col;
291 unsigned int mb_activity;
292 int64_t activity_sum = 0;
293
294 x->mb_activity_ptr = cpi->mb_activity_map;
295
296 // for each macroblock row in image
297 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
298 #if ALT_ACT_MEASURE
299 // reset above block coeffs
300 xd->up_available = (mb_row != 0);
301 recon_yoffset = (mb_row * recon_y_stride * 16);
302 #endif
303 // for each macroblock col in image
304 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
305 #if ALT_ACT_MEASURE
306 xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
307 xd->left_available = (mb_col != 0);
308 recon_yoffset += 16;
309 #endif
310
311 // measure activity
312 mb_activity = mb_activity_measure(x, mb_row, mb_col);
313
314 // Keep frame sum
315 activity_sum += mb_activity;
316
317 // Store MB level activity details.
318 *x->mb_activity_ptr = mb_activity;
319
320 // Increment activity map pointer
321 x->mb_activity_ptr++;
322
323 // adjust to the next column of source macroblocks
324 x->plane[0].src.buf += 16;
325 }
326
327 // adjust to the next row of mbs
328 x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
329 }
330
331 // Calculate an "average" MB activity
332 calc_av_activity(cpi, activity_sum);
333
334 #if USE_ACT_INDEX
335 // Calculate an activity index number of each mb
336 calc_activity_index(cpi, x);
337 #endif
338 }
339
340 // Macroblock activity masking
341 void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
342 #if USE_ACT_INDEX
343 x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
344 x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
345 x->errorperbit += (x->errorperbit == 0);
346 #else
347 int64_t a;
348 int64_t b;
349 int64_t act = *(x->mb_activity_ptr);
350
351 // Apply the masking to the RD multiplier.
352 a = act + (2 * cpi->activity_avg);
353 b = (2 * act) + cpi->activity_avg;
354
355 x->rdmult = (unsigned int) (((int64_t) x->rdmult * b + (a >> 1)) / a);
356 x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
357 x->errorperbit += (x->errorperbit == 0);
358 #endif
359
360 // Activity based Zbin adjustment
361 adjust_act_zbin(cpi, x);
362 }
363
364 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
365 BLOCK_SIZE bsize, int output_enabled) {
366 int i, x_idx, y;
367 VP9_COMMON *const cm = &cpi->common;
368 MACROBLOCK *const x = &cpi->mb;
369 MACROBLOCKD *const xd = &x->e_mbd;
370 struct macroblock_plane *const p = x->plane;
371 struct macroblockd_plane *const pd = xd->plane;
372 MODE_INFO *mi = &ctx->mic;
373 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
374 MODE_INFO *mi_addr = xd->mi_8x8[0];
375
376 int mb_mode_index = ctx->best_mode_index;
377 const int mis = cm->mode_info_stride;
378 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
379 const int mi_height = num_8x8_blocks_high_lookup[bsize];
380 int max_plane;
381
382 assert(mi->mbmi.mode < MB_MODE_COUNT);
383 assert(mi->mbmi.ref_frame[0] < MAX_REF_FRAMES);
384 assert(mi->mbmi.ref_frame[1] < MAX_REF_FRAMES);
385 assert(mi->mbmi.sb_type == bsize);
386
387 *mi_addr = *mi;
388
389 max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
390 for (i = 0; i < max_plane; ++i) {
391 p[i].coeff = ctx->coeff_pbuf[i][1];
392 pd[i].qcoeff = ctx->qcoeff_pbuf[i][1];
393 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
394 pd[i].eobs = ctx->eobs_pbuf[i][1];
395 }
396
397 for (i = max_plane; i < MAX_MB_PLANE; ++i) {
398 p[i].coeff = ctx->coeff_pbuf[i][2];
399 pd[i].qcoeff = ctx->qcoeff_pbuf[i][2];
400 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
401 pd[i].eobs = ctx->eobs_pbuf[i][2];
402 }
403
404 // Restore the coding context of the MB to that that was in place
405 // when the mode was picked for it
406 for (y = 0; y < mi_height; y++)
407 for (x_idx = 0; x_idx < mi_width; x_idx++)
408 if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
409 && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y)
410 xd->mi_8x8[x_idx + y * mis] = mi_addr;
411
412 if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
413 vp9_mb_init_quantizer(cpi, x);
414 }
415
416 // FIXME(rbultje) I'm pretty sure this should go to the end of this block
417 // (i.e. after the output_enabled)
418 if (bsize < BLOCK_32X32) {
419 if (bsize < BLOCK_16X16)
420 ctx->tx_rd_diff[ALLOW_16X16] = ctx->tx_rd_diff[ALLOW_8X8];
421 ctx->tx_rd_diff[ALLOW_32X32] = ctx->tx_rd_diff[ALLOW_16X16];
422 }
423
424 if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
425 mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
426 mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
427 }
428
429 x->skip = ctx->skip;
430 vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
431 sizeof(uint8_t) * ctx->num_4x4_blk);
432
433 if (!output_enabled)
434 return;
435
436 if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
437 for (i = 0; i < TX_MODES; i++)
438 cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
439 }
440
441 if (frame_is_intra_only(cm)) {
442 #if CONFIG_INTERNAL_STATS
443 static const int kf_mode_index[] = {
444 THR_DC /*DC_PRED*/,
445 THR_V_PRED /*V_PRED*/,
446 THR_H_PRED /*H_PRED*/,
447 THR_D45_PRED /*D45_PRED*/,
448 THR_D135_PRED /*D135_PRED*/,
449 THR_D117_PRED /*D117_PRED*/,
450 THR_D153_PRED /*D153_PRED*/,
451 THR_D207_PRED /*D207_PRED*/,
452 THR_D63_PRED /*D63_PRED*/,
453 THR_TM /*TM_PRED*/,
454 };
455 cpi->mode_chosen_counts[kf_mode_index[mi->mbmi.mode]]++;
456 #endif
457 } else {
458 // Note how often each mode chosen as best
459 cpi->mode_chosen_counts[mb_mode_index]++;
460 if (is_inter_block(mbmi)
461 && (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
462 int_mv best_mv[2];
463 const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0];
464 const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1];
465 best_mv[0].as_int = ctx->best_ref_mv.as_int;
466 best_mv[1].as_int = ctx->second_best_ref_mv.as_int;
467 if (mbmi->mode == NEWMV) {
468 best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int;
469 if (rf2 > 0)
470 best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int;
471 }
472 mbmi->best_mv[0].as_int = best_mv[0].as_int;
473 mbmi->best_mv[1].as_int = best_mv[1].as_int;
474 vp9_update_mv_count(cpi, x, best_mv);
475 }
476
477 if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) {
478 const int ctx = vp9_get_pred_context_switchable_interp(xd);
479 ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
480 }
481
482 cpi->rd_comp_pred_diff[SINGLE_PREDICTION_ONLY] += ctx->single_pred_diff;
483 cpi->rd_comp_pred_diff[COMP_PREDICTION_ONLY] += ctx->comp_pred_diff;
484 cpi->rd_comp_pred_diff[HYBRID_PREDICTION] += ctx->hybrid_pred_diff;
485
486 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
487 cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
488 }
489 }
490
491 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
492 int mi_row, int mi_col) {
493 uint8_t *const buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
494 src->alpha_buffer};
495 const int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
496 src->alpha_stride};
497 int i;
498
499 for (i = 0; i < MAX_MB_PLANE; i++)
500 setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
501 NULL, x->e_mbd.plane[i].subsampling_x,
502 x->e_mbd.plane[i].subsampling_y);
503 }
504
505 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
506 int mi_row, int mi_col, BLOCK_SIZE bsize) {
507 MACROBLOCK *const x = &cpi->mb;
508 VP9_COMMON *const cm = &cpi->common;
509 MACROBLOCKD *const xd = &x->e_mbd;
510 MB_MODE_INFO *mbmi;
511 const int dst_fb_idx = cm->new_fb_idx;
512 const int idx_str = xd->mode_info_stride * mi_row + mi_col;
513 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
514 const int mi_height = num_8x8_blocks_high_lookup[bsize];
515 const int mb_row = mi_row >> 1;
516 const int mb_col = mi_col >> 1;
517 const int idx_map = mb_row * cm->mb_cols + mb_col;
518 const struct segmentation *const seg = &cm->seg;
519
520 set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col);
521
522 // Activity map pointer
523 x->mb_activity_ptr = &cpi->mb_activity_map[idx_map];
524 x->active_ptr = cpi->active_map + idx_map;
525
526 xd->mi_8x8 = cm->mi_grid_visible + idx_str;
527 xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
528
529 // Special case: if prev_mi is NULL, the previous mode info context
530 // cannot be used.
531 xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL;
532
533 xd->mi_8x8[0] = cm->mi + idx_str;
534
535 mbmi = &xd->mi_8x8[0]->mbmi;
536
537 // Set up destination pointers
538 setup_dst_planes(xd, &cm->yv12_fb[dst_fb_idx], mi_row, mi_col);
539
540 // Set up limit values for MV components
541 // mv beyond the range do not produce new/different prediction block
542 x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
543 x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
544 x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
545 x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
546
547 // Set up distance of MB to edge of frame in 1/8th pel units
548 assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
549 set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
550 cm->mi_rows, cm->mi_cols);
551
552 /* set up source buffers */
553 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
554
555 /* R/D setup */
556 x->rddiv = cpi->RDDIV;
557 x->rdmult = cpi->RDMULT;
558
559 /* segment ID */
560 if (seg->enabled) {
561 if (!cpi->oxcf.aq_mode == VARIANCE_AQ) {
562 uint8_t *map = seg->update_map ? cpi->segmentation_map
563 : cm->last_frame_seg_map;
564 mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col);
565 }
566 vp9_mb_init_quantizer(cpi, x);
567
568 if (seg->enabled && cpi->seg0_cnt > 0
569 && !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME)
570 && vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) {
571 cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
572 } else {
573 const int y = mb_row & ~3;
574 const int x = mb_col & ~3;
575 const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
576 const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
577 const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1;
578 const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1;
579
580 cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress)
581 << 16) / cm->MBs;
582 }
583
584 x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
585 } else {
586 mbmi->segment_id = 0;
587 x->encode_breakout = cpi->oxcf.encode_breakout;
588 }
589 }
590
591 static void pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
592 int mi_row, int mi_col,
593 int *totalrate, int64_t *totaldist,
594 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
595 int64_t best_rd) {
596 VP9_COMMON *const cm = &cpi->common;
597 MACROBLOCK *const x = &cpi->mb;
598 MACROBLOCKD *const xd = &x->e_mbd;
599 struct macroblock_plane *const p = x->plane;
600 struct macroblockd_plane *const pd = xd->plane;
601 int i;
602 int orig_rdmult = x->rdmult;
603 double rdmult_ratio;
604
605 vp9_clear_system_state(); // __asm emms;
606 rdmult_ratio = 1.0; // avoid uninitialized warnings
607
608 // Use the lower precision, but faster, 32x32 fdct for mode selection.
609 x->use_lp32x32fdct = 1;
610
611 if (bsize < BLOCK_8X8) {
612 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
613 // there is nothing to be done.
614 if (x->ab_index != 0) {
615 *totalrate = 0;
616 *totaldist = 0;
617 return;
618 }
619 }
620
621 set_offsets(cpi, tile, mi_row, mi_col, bsize);
622 xd->mi_8x8[0]->mbmi.sb_type = bsize;
623
624 for (i = 0; i < MAX_MB_PLANE; ++i) {
625 p[i].coeff = ctx->coeff_pbuf[i][0];
626 pd[i].qcoeff = ctx->qcoeff_pbuf[i][0];
627 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
628 pd[i].eobs = ctx->eobs_pbuf[i][0];
629 }
630 ctx->is_coded = 0;
631 x->skip_recode = 0;
632
633 // Set to zero to make sure we do not use the previous encoded frame stats
634 xd->mi_8x8[0]->mbmi.skip_coeff = 0;
635
636 x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
637
638 if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
639 int energy;
640 if (bsize <= BLOCK_16X16) {
641 energy = x->mb_energy;
642 } else {
643 energy = vp9_block_energy(cpi, x, bsize);
644 }
645
646 xd->mi_8x8[0]->mbmi.segment_id = vp9_vaq_segment_id(energy);
647 rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
648 vp9_mb_init_quantizer(cpi, x);
649 }
650
651 if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
652 vp9_activity_masking(cpi, x);
653
654 if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
655 vp9_clear_system_state(); // __asm emms;
656 x->rdmult = round(x->rdmult * rdmult_ratio);
657 }
658
659 // Find best coding mode & reconstruct the MB so it is available
660 // as a predictor for MBs that follow in the SB
661 if (frame_is_intra_only(cm)) {
662 vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx,
663 best_rd);
664 } else {
665 if (bsize >= BLOCK_8X8)
666 vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
667 totalrate, totaldist, bsize, ctx, best_rd);
668 else
669 vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate,
670 totaldist, bsize, ctx, best_rd);
671 }
672
673 if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
674 x->rdmult = orig_rdmult;
675 if (*totalrate != INT_MAX) {
676 vp9_clear_system_state(); // __asm emms;
677 *totalrate = round(*totalrate * rdmult_ratio);
678 }
679 }
680 }
681
682 static void update_stats(VP9_COMP *cpi) {
683 VP9_COMMON *const cm = &cpi->common;
684 MACROBLOCK *const x = &cpi->mb;
685 MACROBLOCKD *const xd = &x->e_mbd;
686 MODE_INFO *mi = xd->mi_8x8[0];
687 MB_MODE_INFO *const mbmi = &mi->mbmi;
688
689 if (!frame_is_intra_only(cm)) {
690 const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
691 SEG_LVL_REF_FRAME);
692
693 if (!seg_ref_active)
694 cpi->intra_inter_count[vp9_get_pred_context_intra_inter(xd)]
695 [is_inter_block(mbmi)]++;
696
697 // If the segment reference feature is enabled we have only a single
698 // reference frame allowed for the segment so exclude it from
699 // the reference frame counts used to work out probabilities.
700 if (is_inter_block(mbmi) && !seg_ref_active) {
701 if (cm->comp_pred_mode == HYBRID_PREDICTION)
702 cpi->comp_inter_count[vp9_get_pred_context_comp_inter_inter(cm, xd)]
703 [has_second_ref(mbmi)]++;
704
705 if (has_second_ref(mbmi)) {
706 cpi->comp_ref_count[vp9_get_pred_context_comp_ref_p(cm, xd)]
707 [mbmi->ref_frame[0] == GOLDEN_FRAME]++;
708 } else {
709 cpi->single_ref_count[vp9_get_pred_context_single_ref_p1(xd)][0]
710 [mbmi->ref_frame[0] != LAST_FRAME]++;
711 if (mbmi->ref_frame[0] != LAST_FRAME)
712 cpi->single_ref_count[vp9_get_pred_context_single_ref_p2(xd)][1]
713 [mbmi->ref_frame[0] != GOLDEN_FRAME]++;
714 }
715 }
716 }
717 }
718
719 static BLOCK_SIZE *get_sb_partitioning(MACROBLOCK *x, BLOCK_SIZE bsize) {
720 switch (bsize) {
721 case BLOCK_64X64:
722 return &x->sb64_partitioning;
723 case BLOCK_32X32:
724 return &x->sb_partitioning[x->sb_index];
725 case BLOCK_16X16:
726 return &x->mb_partitioning[x->sb_index][x->mb_index];
727 case BLOCK_8X8:
728 return &x->b_partitioning[x->sb_index][x->mb_index][x->b_index];
729 default:
730 assert(0);
731 return NULL;
732 }
733 }
734
735 static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col,
736 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
737 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
738 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
739 BLOCK_SIZE bsize) {
740 MACROBLOCK *const x = &cpi->mb;
741 MACROBLOCKD *const xd = &x->e_mbd;
742 int p;
743 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
744 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
745 int mi_width = num_8x8_blocks_wide_lookup[bsize];
746 int mi_height = num_8x8_blocks_high_lookup[bsize];
747 for (p = 0; p < MAX_MB_PLANE; p++) {
748 vpx_memcpy(
749 cpi->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
750 a + num_4x4_blocks_wide * p,
751 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
752 xd->plane[p].subsampling_x);
753 vpx_memcpy(
754 cpi->left_context[p]
755 + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
756 l + num_4x4_blocks_high * p,
757 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
758 xd->plane[p].subsampling_y);
759 }
760 vpx_memcpy(cpi->above_seg_context + mi_col, sa,
761 sizeof(*cpi->above_seg_context) * mi_width);
762 vpx_memcpy(cpi->left_seg_context + (mi_row & MI_MASK), sl,
763 sizeof(cpi->left_seg_context[0]) * mi_height);
764 }
765 static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
766 ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
767 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
768 PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
769 BLOCK_SIZE bsize) {
770 const MACROBLOCK *const x = &cpi->mb;
771 const MACROBLOCKD *const xd = &x->e_mbd;
772 int p;
773 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
774 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
775 int mi_width = num_8x8_blocks_wide_lookup[bsize];
776 int mi_height = num_8x8_blocks_high_lookup[bsize];
777
778 // buffer the above/left context information of the block in search.
779 for (p = 0; p < MAX_MB_PLANE; ++p) {
780 vpx_memcpy(
781 a + num_4x4_blocks_wide * p,
782 cpi->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
783 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
784 xd->plane[p].subsampling_x);
785 vpx_memcpy(
786 l + num_4x4_blocks_high * p,
787 cpi->left_context[p]
788 + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
789 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
790 xd->plane[p].subsampling_y);
791 }
792 vpx_memcpy(sa, cpi->above_seg_context + mi_col,
793 sizeof(*cpi->above_seg_context) * mi_width);
794 vpx_memcpy(sl, cpi->left_seg_context + (mi_row & MI_MASK),
795 sizeof(cpi->left_seg_context[0]) * mi_height);
796 }
797
798 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
799 TOKENEXTRA **tp, int mi_row, int mi_col,
800 int output_enabled, BLOCK_SIZE bsize, int sub_index) {
801 VP9_COMMON *const cm = &cpi->common;
802 MACROBLOCK *const x = &cpi->mb;
803
804 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
805 return;
806
807 if (sub_index != -1)
808 *get_sb_index(x, bsize) = sub_index;
809
810 if (bsize < BLOCK_8X8) {
811 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
812 // there is nothing to be done.
813 if (x->ab_index > 0)
814 return;
815 }
816 set_offsets(cpi, tile, mi_row, mi_col, bsize);
817 update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
818 encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
819
820 if (output_enabled) {
821 update_stats(cpi);
822
823 (*tp)->token = EOSB_TOKEN;
824 (*tp)++;
825 }
826 }
827
828 static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile,
829 TOKENEXTRA **tp, int mi_row, int mi_col,
830 int output_enabled, BLOCK_SIZE bsize) {
831 VP9_COMMON *const cm = &cpi->common;
832 MACROBLOCK *const x = &cpi->mb;
833 BLOCK_SIZE c1 = BLOCK_8X8;
834 const int bsl = b_width_log2(bsize), bs = (1 << bsl) / 4;
835 int pl = 0;
836 PARTITION_TYPE partition;
837 BLOCK_SIZE subsize;
838 int i;
839
840 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
841 return;
842
843 c1 = BLOCK_4X4;
844 if (bsize >= BLOCK_8X8) {
845 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
846 mi_row, mi_col, bsize);
847 c1 = *(get_sb_partitioning(x, bsize));
848 }
849 partition = partition_lookup[bsl][c1];
850
851 switch (partition) {
852 case PARTITION_NONE:
853 if (output_enabled && bsize >= BLOCK_8X8)
854 cpi->partition_count[pl][PARTITION_NONE]++;
855 encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, -1);
856 break;
857 case PARTITION_VERT:
858 if (output_enabled)
859 cpi->partition_count[pl][PARTITION_VERT]++;
860 encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, 0);
861 encode_b(cpi, tile, tp, mi_row, mi_col + bs, output_enabled, c1, 1);
862 break;
863 case PARTITION_HORZ:
864 if (output_enabled)
865 cpi->partition_count[pl][PARTITION_HORZ]++;
866 encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, c1, 0);
867 encode_b(cpi, tile, tp, mi_row + bs, mi_col, output_enabled, c1, 1);
868 break;
869 case PARTITION_SPLIT:
870 subsize = get_subsize(bsize, PARTITION_SPLIT);
871
872 if (output_enabled)
873 cpi->partition_count[pl][PARTITION_SPLIT]++;
874
875 for (i = 0; i < 4; i++) {
876 const int x_idx = i & 1, y_idx = i >> 1;
877
878 *get_sb_index(x, subsize) = i;
879 encode_sb(cpi, tile, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
880 output_enabled, subsize);
881 }
882 break;
883 default:
884 assert(0);
885 break;
886 }
887
888 if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
889 update_partition_context(cpi->above_seg_context, cpi->left_seg_context,
890 mi_row, mi_col, c1, bsize);
891 }
892
893 // Check to see if the given partition size is allowed for a specified number
894 // of 8x8 block rows and columns remaining in the image.
895 // If not then return the largest allowed partition size
896 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
897 int rows_left, int cols_left,
898 int *bh, int *bw) {
899 if ((rows_left <= 0) || (cols_left <= 0)) {
900 return MIN(bsize, BLOCK_8X8);
901 } else {
902 for (; bsize > 0; --bsize) {
903 *bh = num_8x8_blocks_high_lookup[bsize];
904 *bw = num_8x8_blocks_wide_lookup[bsize];
905 if ((*bh <= rows_left) && (*bw <= cols_left)) {
906 break;
907 }
908 }
909 }
910 return bsize;
911 }
912
913 // This function attempts to set all mode info entries in a given SB64
914 // to the same block partition size.
915 // However, at the bottom and right borders of the image the requested size
916 // may not be allowed in which case this code attempts to choose the largest
917 // allowable partition.
918 static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
919 MODE_INFO **mi_8x8, int mi_row, int mi_col) {
920 VP9_COMMON *const cm = &cpi->common;
921 BLOCK_SIZE bsize = cpi->sf.always_this_block_size;
922 const int mis = cm->mode_info_stride;
923 int row8x8_remaining = tile->mi_row_end - mi_row;
924 int col8x8_remaining = tile->mi_col_end - mi_col;
925 int block_row, block_col;
926 MODE_INFO * mi_upper_left = cm->mi + mi_row * mis + mi_col;
927 int bh = num_8x8_blocks_high_lookup[bsize];
928 int bw = num_8x8_blocks_wide_lookup[bsize];
929
930 assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
931
932 // Apply the requested partition size to the SB64 if it is all "in image"
933 if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
934 (row8x8_remaining >= MI_BLOCK_SIZE)) {
935 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
936 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
937 int index = block_row * mis + block_col;
938 mi_8x8[index] = mi_upper_left + index;
939 mi_8x8[index]->mbmi.sb_type = bsize;
940 }
941 }
942 } else {
943 // Else this is a partial SB64.
944 for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
945 for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
946 int index = block_row * mis + block_col;
947 // Find a partition size that fits
948 bsize = find_partition_size(cpi->sf.always_this_block_size,
949 (row8x8_remaining - block_row),
950 (col8x8_remaining - block_col), &bh, &bw);
951 mi_8x8[index] = mi_upper_left + index;
952 mi_8x8[index]->mbmi.sb_type = bsize;
953 }
954 }
955 }
956 }
957
958 static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
959 MODE_INFO **prev_mi_8x8) {
960 VP9_COMMON *const cm = &cpi->common;
961 const int mis = cm->mode_info_stride;
962 int block_row, block_col;
963
964 for (block_row = 0; block_row < 8; ++block_row) {
965 for (block_col = 0; block_col < 8; ++block_col) {
966 MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col];
967 BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
968 ptrdiff_t offset;
969
970 if (prev_mi) {
971 offset = prev_mi - cm->prev_mi;
972 mi_8x8[block_row * mis + block_col] = cm->mi + offset;
973 mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
974 }
975 }
976 }
977 }
978
979 static int sb_has_motion(VP9_COMP *cpi, MODE_INFO **prev_mi_8x8) {
980 VP9_COMMON *const cm = &cpi->common;
981 const int mis = cm->mode_info_stride;
982 int block_row, block_col;
983
984 if (cm->prev_mi) {
985 for (block_row = 0; block_row < 8; ++block_row) {
986 for (block_col = 0; block_col < 8; ++block_col) {
987 MODE_INFO * prev_mi = prev_mi_8x8[block_row * mis + block_col];
988 if (prev_mi) {
989 if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 ||
990 abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8)
991 return 1;
992 }
993 }
994 }
995 }
996 return 0;
997 }
998
999 static void rd_use_partition(VP9_COMP *cpi,
1000 const TileInfo *const tile,
1001 MODE_INFO **mi_8x8,
1002 TOKENEXTRA **tp, int mi_row, int mi_col,
1003 BLOCK_SIZE bsize, int *rate, int64_t *dist,
1004 int do_recon) {
1005 VP9_COMMON *const cm = &cpi->common;
1006 MACROBLOCK *const x = &cpi->mb;
1007 const int mis = cm->mode_info_stride;
1008 int bsl = b_width_log2(bsize);
1009 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1010 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1011 int ms = num_4x4_blocks_wide / 2;
1012 int mh = num_4x4_blocks_high / 2;
1013 int bss = (1 << bsl) / 4;
1014 int i, pl;
1015 PARTITION_TYPE partition = PARTITION_NONE;
1016 BLOCK_SIZE subsize;
1017 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1018 PARTITION_CONTEXT sl[8], sa[8];
1019 int last_part_rate = INT_MAX;
1020 int64_t last_part_dist = INT_MAX;
1021 int split_rate = INT_MAX;
1022 int64_t split_dist = INT_MAX;
1023 int none_rate = INT_MAX;
1024 int64_t none_dist = INT_MAX;
1025 int chosen_rate = INT_MAX;
1026 int64_t chosen_dist = INT_MAX;
1027 BLOCK_SIZE sub_subsize = BLOCK_4X4;
1028 int splits_below = 0;
1029 BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
1030
1031 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1032 return;
1033
1034 partition = partition_lookup[bsl][bs_type];
1035
1036 subsize = get_subsize(bsize, partition);
1037
1038 if (bsize < BLOCK_8X8) {
1039 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
1040 // there is nothing to be done.
1041 if (x->ab_index != 0) {
1042 *rate = 0;
1043 *dist = 0;
1044 return;
1045 }
1046 } else {
1047 *(get_sb_partitioning(x, bsize)) = subsize;
1048 }
1049 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1050
1051 if (bsize == BLOCK_16X16) {
1052 set_offsets(cpi, tile, mi_row, mi_col, bsize);
1053 x->mb_energy = vp9_block_energy(cpi, x, bsize);
1054 }
1055
1056 x->fast_ms = 0;
1057 x->subblock_ref = 0;
1058
1059 if (cpi->sf.adjust_partitioning_from_last_frame) {
1060 // Check if any of the sub blocks are further split.
1061 if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
1062 sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
1063 splits_below = 1;
1064 for (i = 0; i < 4; i++) {
1065 int jj = i >> 1, ii = i & 0x01;
1066 MODE_INFO * this_mi = mi_8x8[jj * bss * mis + ii * bss];
1067 if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
1068 splits_below = 0;
1069 }
1070 }
1071 }
1072
1073 // If partition is not none try none unless each of the 4 splits are split
1074 // even further..
1075 if (partition != PARTITION_NONE && !splits_below &&
1076 mi_row + (ms >> 1) < cm->mi_rows &&
1077 mi_col + (ms >> 1) < cm->mi_cols) {
1078 *(get_sb_partitioning(x, bsize)) = bsize;
1079 pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rate, &none_dist, bsize,
1080 get_block_context(x, bsize), INT64_MAX);
1081
1082 pl = partition_plane_context(cpi->above_seg_context,
1083 cpi->left_seg_context,
1084 mi_row, mi_col, bsize);
1085 none_rate += x->partition_cost[pl][PARTITION_NONE];
1086
1087 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1088 mi_8x8[0]->mbmi.sb_type = bs_type;
1089 *(get_sb_partitioning(x, bsize)) = subsize;
1090 }
1091 }
1092
1093 switch (partition) {
1094 case PARTITION_NONE:
1095 pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
1096 bsize, get_block_context(x, bsize), INT64_MAX);
1097 break;
1098 case PARTITION_HORZ:
1099 *get_sb_index(x, subsize) = 0;
1100 pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
1101 subsize, get_block_context(x, subsize), INT64_MAX);
1102 if (last_part_rate != INT_MAX &&
1103 bsize >= BLOCK_8X8 && mi_row + (mh >> 1) < cm->mi_rows) {
1104 int rt = 0;
1105 int64_t dt = 0;
1106 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1107 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1108 *get_sb_index(x, subsize) = 1;
1109 pick_sb_modes(cpi, tile, mi_row + (ms >> 1), mi_col, &rt, &dt, subsize,
1110 get_block_context(x, subsize), INT64_MAX);
1111 if (rt == INT_MAX || dt == INT_MAX) {
1112 last_part_rate = INT_MAX;
1113 last_part_dist = INT_MAX;
1114 break;
1115 }
1116
1117 last_part_rate += rt;
1118 last_part_dist += dt;
1119 }
1120 break;
1121 case PARTITION_VERT:
1122 *get_sb_index(x, subsize) = 0;
1123 pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rate, &last_part_dist,
1124 subsize, get_block_context(x, subsize), INT64_MAX);
1125 if (last_part_rate != INT_MAX &&
1126 bsize >= BLOCK_8X8 && mi_col + (ms >> 1) < cm->mi_cols) {
1127 int rt = 0;
1128 int64_t dt = 0;
1129 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1130 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1131 *get_sb_index(x, subsize) = 1;
1132 pick_sb_modes(cpi, tile, mi_row, mi_col + (ms >> 1), &rt, &dt, subsize,
1133 get_block_context(x, subsize), INT64_MAX);
1134 if (rt == INT_MAX || dt == INT_MAX) {
1135 last_part_rate = INT_MAX;
1136 last_part_dist = INT_MAX;
1137 break;
1138 }
1139 last_part_rate += rt;
1140 last_part_dist += dt;
1141 }
1142 break;
1143 case PARTITION_SPLIT:
1144 // Split partition.
1145 last_part_rate = 0;
1146 last_part_dist = 0;
1147 for (i = 0; i < 4; i++) {
1148 int x_idx = (i & 1) * (ms >> 1);
1149 int y_idx = (i >> 1) * (ms >> 1);
1150 int jj = i >> 1, ii = i & 0x01;
1151 int rt;
1152 int64_t dt;
1153
1154 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1155 continue;
1156
1157 *get_sb_index(x, subsize) = i;
1158
1159 rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
1160 mi_row + y_idx, mi_col + x_idx, subsize, &rt, &dt,
1161 i != 3);
1162 if (rt == INT_MAX || dt == INT_MAX) {
1163 last_part_rate = INT_MAX;
1164 last_part_dist = INT_MAX;
1165 break;
1166 }
1167 last_part_rate += rt;
1168 last_part_dist += dt;
1169 }
1170 break;
1171 default:
1172 assert(0);
1173 }
1174
1175 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
1176 mi_row, mi_col, bsize);
1177 if (last_part_rate < INT_MAX)
1178 last_part_rate += x->partition_cost[pl][partition];
1179
1180 if (cpi->sf.adjust_partitioning_from_last_frame
1181 && partition != PARTITION_SPLIT && bsize > BLOCK_8X8
1182 && (mi_row + ms < cm->mi_rows || mi_row + (ms >> 1) == cm->mi_rows)
1183 && (mi_col + ms < cm->mi_cols || mi_col + (ms >> 1) == cm->mi_cols)) {
1184 BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
1185 split_rate = 0;
1186 split_dist = 0;
1187 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1188
1189 // Split partition.
1190 for (i = 0; i < 4; i++) {
1191 int x_idx = (i & 1) * (num_4x4_blocks_wide >> 2);
1192 int y_idx = (i >> 1) * (num_4x4_blocks_wide >> 2);
1193 int rt = 0;
1194 int64_t dt = 0;
1195 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1196 PARTITION_CONTEXT sl[8], sa[8];
1197
1198 if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1199 continue;
1200
1201 *get_sb_index(x, split_subsize) = i;
1202 *get_sb_partitioning(x, bsize) = split_subsize;
1203 *get_sb_partitioning(x, split_subsize) = split_subsize;
1204
1205 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1206
1207 pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &rt, &dt,
1208 split_subsize, get_block_context(x, split_subsize),
1209 INT64_MAX);
1210
1211 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1212
1213 if (rt == INT_MAX || dt == INT_MAX) {
1214 split_rate = INT_MAX;
1215 split_dist = INT_MAX;
1216 break;
1217 }
1218
1219 if (i != 3)
1220 encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0,
1221 split_subsize);
1222
1223 split_rate += rt;
1224 split_dist += dt;
1225 pl = partition_plane_context(cpi->above_seg_context,
1226 cpi->left_seg_context,
1227 mi_row + y_idx, mi_col + x_idx, bsize);
1228 split_rate += x->partition_cost[pl][PARTITION_NONE];
1229 }
1230 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
1231 mi_row, mi_col, bsize);
1232 if (split_rate < INT_MAX) {
1233 split_rate += x->partition_cost[pl][PARTITION_SPLIT];
1234
1235 chosen_rate = split_rate;
1236 chosen_dist = split_dist;
1237 }
1238 }
1239
1240 // If last_part is better set the partitioning to that...
1241 if (RDCOST(x->rdmult, x->rddiv, last_part_rate, last_part_dist)
1242 < RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)) {
1243 mi_8x8[0]->mbmi.sb_type = bsize;
1244 if (bsize >= BLOCK_8X8)
1245 *(get_sb_partitioning(x, bsize)) = subsize;
1246 chosen_rate = last_part_rate;
1247 chosen_dist = last_part_dist;
1248 }
1249 // If none was better set the partitioning to that...
1250 if (RDCOST(x->rdmult, x->rddiv, chosen_rate, chosen_dist)
1251 > RDCOST(x->rdmult, x->rddiv, none_rate, none_dist)) {
1252 if (bsize >= BLOCK_8X8)
1253 *(get_sb_partitioning(x, bsize)) = bsize;
1254 chosen_rate = none_rate;
1255 chosen_dist = none_dist;
1256 }
1257
1258 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1259
1260 // We must have chosen a partitioning and encoding or we'll fail later on.
1261 // No other opportunities for success.
1262 if ( bsize == BLOCK_64X64)
1263 assert(chosen_rate < INT_MAX && chosen_dist < INT_MAX);
1264
1265 if (do_recon)
1266 encode_sb(cpi, tile, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize);
1267
1268 *rate = chosen_rate;
1269 *dist = chosen_dist;
1270 }
1271
1272 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
1273 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
1274 BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
1275 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
1276 };
1277
1278 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
1279 BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
1280 BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64,
1281 BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
1282 };
1283
1284 // Look at all the mode_info entries for blocks that are part of this
1285 // partition and find the min and max values for sb_type.
1286 // At the moment this is designed to work on a 64x64 SB but could be
1287 // adjusted to use a size parameter.
1288 //
1289 // The min and max are assumed to have been initialized prior to calling this
1290 // function so repeat calls can accumulate a min and max of more than one sb64.
1291 static void get_sb_partition_size_range(VP9_COMP *cpi, MODE_INFO ** mi_8x8,
1292 BLOCK_SIZE * min_block_size,
1293 BLOCK_SIZE * max_block_size ) {
1294 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
1295 int sb_width_in_blocks = MI_BLOCK_SIZE;
1296 int sb_height_in_blocks = MI_BLOCK_SIZE;
1297 int i, j;
1298 int index = 0;
1299
1300 // Check the sb_type for each block that belongs to this region.
1301 for (i = 0; i < sb_height_in_blocks; ++i) {
1302 for (j = 0; j < sb_width_in_blocks; ++j) {
1303 MODE_INFO * mi = mi_8x8[index+j];
1304 BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
1305 *min_block_size = MIN(*min_block_size, sb_type);
1306 *max_block_size = MAX(*max_block_size, sb_type);
1307 }
1308 index += xd->mode_info_stride;
1309 }
1310 }
1311
1312 // Look at neighboring blocks and set a min and max partition size based on
1313 // what they chose.
1314 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
1315 int row, int col,
1316 BLOCK_SIZE *min_block_size,
1317 BLOCK_SIZE *max_block_size) {
1318 VP9_COMMON * const cm = &cpi->common;
1319 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
1320 MODE_INFO ** mi_8x8 = xd->mi_8x8;
1321 MODE_INFO ** prev_mi_8x8 = xd->prev_mi_8x8;
1322
1323 const int left_in_image = xd->left_available && mi_8x8[-1];
1324 const int above_in_image = xd->up_available &&
1325 mi_8x8[-xd->mode_info_stride];
1326 MODE_INFO ** above_sb64_mi_8x8;
1327 MODE_INFO ** left_sb64_mi_8x8;
1328
1329 int row8x8_remaining = tile->mi_row_end - row;
1330 int col8x8_remaining = tile->mi_col_end - col;
1331 int bh, bw;
1332
1333 // Trap case where we do not have a prediction.
1334 if (!left_in_image && !above_in_image &&
1335 ((cm->frame_type == KEY_FRAME) || !cm->prev_mi)) {
1336 *min_block_size = BLOCK_4X4;
1337 *max_block_size = BLOCK_64X64;
1338 } else {
1339 // Default "min to max" and "max to min"
1340 *min_block_size = BLOCK_64X64;
1341 *max_block_size = BLOCK_4X4;
1342
1343 // NOTE: each call to get_sb_partition_size_range() uses the previous
1344 // passed in values for min and max as a starting point.
1345 //
1346 // Find the min and max partition used in previous frame at this location
1347 if (cm->prev_mi && (cm->frame_type != KEY_FRAME)) {
1348 get_sb_partition_size_range(cpi, prev_mi_8x8,
1349 min_block_size, max_block_size);
1350 }
1351
1352 // Find the min and max partition sizes used in the left SB64
1353 if (left_in_image) {
1354 left_sb64_mi_8x8 = &mi_8x8[-MI_BLOCK_SIZE];
1355 get_sb_partition_size_range(cpi, left_sb64_mi_8x8,
1356 min_block_size, max_block_size);
1357 }
1358
1359 // Find the min and max partition sizes used in the above SB64.
1360 if (above_in_image) {
1361 above_sb64_mi_8x8 = &mi_8x8[-xd->mode_info_stride * MI_BLOCK_SIZE];
1362 get_sb_partition_size_range(cpi, above_sb64_mi_8x8,
1363 min_block_size, max_block_size);
1364 }
1365 }
1366
1367 // Give a bit of leaway either side of the observed min and max
1368 *min_block_size = min_partition_size[*min_block_size];
1369 *max_block_size = max_partition_size[*max_block_size];
1370
1371 // Check border cases where max and min from neighbours may not be legal.
1372 *max_block_size = find_partition_size(*max_block_size,
1373 row8x8_remaining, col8x8_remaining,
1374 &bh, &bw);
1375 *min_block_size = MIN(*min_block_size, *max_block_size);
1376 }
1377
1378 static void compute_fast_motion_search_level(VP9_COMP *cpi, BLOCK_SIZE bsize) {
1379 VP9_COMMON *const cm = &cpi->common;
1380 MACROBLOCK *const x = &cpi->mb;
1381
1382 // Only use 8x8 result for non HD videos.
1383 // int use_8x8 = (MIN(cpi->common.width, cpi->common.height) < 720) ? 1 : 0;
1384 int use_8x8 = 1;
1385
1386 if (cm->frame_type && !cpi->is_src_frame_alt_ref &&
1387 ((use_8x8 && bsize == BLOCK_16X16) ||
1388 bsize == BLOCK_32X32 || bsize == BLOCK_64X64)) {
1389 int ref0 = 0, ref1 = 0, ref2 = 0, ref3 = 0;
1390 PICK_MODE_CONTEXT *block_context = NULL;
1391
1392 if (bsize == BLOCK_16X16) {
1393 block_context = x->sb8x8_context[x->sb_index][x->mb_index];
1394 } else if (bsize == BLOCK_32X32) {
1395 block_context = x->mb_context[x->sb_index];
1396 } else if (bsize == BLOCK_64X64) {
1397 block_context = x->sb32_context;
1398 }
1399
1400 if (block_context) {
1401 ref0 = block_context[0].mic.mbmi.ref_frame[0];
1402 ref1 = block_context[1].mic.mbmi.ref_frame[0];
1403 ref2 = block_context[2].mic.mbmi.ref_frame[0];
1404 ref3 = block_context[3].mic.mbmi.ref_frame[0];
1405 }
1406
1407 // Currently, only consider 4 inter reference frames.
1408 if (ref0 && ref1 && ref2 && ref3) {
1409 int d01, d23, d02, d13;
1410
1411 // Motion vectors for the four subblocks.
1412 int16_t mvr0 = block_context[0].mic.mbmi.mv[0].as_mv.row;
1413 int16_t mvc0 = block_context[0].mic.mbmi.mv[0].as_mv.col;
1414 int16_t mvr1 = block_context[1].mic.mbmi.mv[0].as_mv.row;
1415 int16_t mvc1 = block_context[1].mic.mbmi.mv[0].as_mv.col;
1416 int16_t mvr2 = block_context[2].mic.mbmi.mv[0].as_mv.row;
1417 int16_t mvc2 = block_context[2].mic.mbmi.mv[0].as_mv.col;
1418 int16_t mvr3 = block_context[3].mic.mbmi.mv[0].as_mv.row;
1419 int16_t mvc3 = block_context[3].mic.mbmi.mv[0].as_mv.col;
1420
1421 // Adjust sign if ref is alt_ref.
1422 if (cm->ref_frame_sign_bias[ref0]) {
1423 mvr0 *= -1;
1424 mvc0 *= -1;
1425 }
1426
1427 if (cm->ref_frame_sign_bias[ref1]) {
1428 mvr1 *= -1;
1429 mvc1 *= -1;
1430 }
1431
1432 if (cm->ref_frame_sign_bias[ref2]) {
1433 mvr2 *= -1;
1434 mvc2 *= -1;
1435 }
1436
1437 if (cm->ref_frame_sign_bias[ref3]) {
1438 mvr3 *= -1;
1439 mvc3 *= -1;
1440 }
1441
1442 // Calculate mv distances.
1443 d01 = MAX(abs(mvr0 - mvr1), abs(mvc0 - mvc1));
1444 d23 = MAX(abs(mvr2 - mvr3), abs(mvc2 - mvc3));
1445 d02 = MAX(abs(mvr0 - mvr2), abs(mvc0 - mvc2));
1446 d13 = MAX(abs(mvr1 - mvr3), abs(mvc1 - mvc3));
1447
1448 if (d01 < FAST_MOTION_MV_THRESH && d23 < FAST_MOTION_MV_THRESH &&
1449 d02 < FAST_MOTION_MV_THRESH && d13 < FAST_MOTION_MV_THRESH) {
1450 // Set fast motion search level.
1451 x->fast_ms = 1;
1452
1453 if (ref0 == ref1 && ref1 == ref2 && ref2 == ref3 &&
1454 d01 < 2 && d23 < 2 && d02 < 2 && d13 < 2) {
1455 // Set fast motion search level.
1456 x->fast_ms = 2;
1457
1458 if (!d01 && !d23 && !d02 && !d13) {
1459 x->fast_ms = 3;
1460 x->subblock_ref = ref0;
1461 }
1462 }
1463 }
1464 }
1465 }
1466 }
1467
1468 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
1469 vpx_memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
1470 }
1471
1472 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
1473 vpx_memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
1474 }
1475
1476 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
1477 // unlikely to be selected depending on previous rate-distortion optimization
1478 // results, for encoding speed-up.
1479 static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
1480 TOKENEXTRA **tp, int mi_row,
1481 int mi_col, BLOCK_SIZE bsize, int *rate,
1482 int64_t *dist, int do_recon, int64_t best_rd) {
1483 VP9_COMMON *const cm = &cpi->common;
1484 MACROBLOCK *const x = &cpi->mb;
1485 const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
1486 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1487 PARTITION_CONTEXT sl[8], sa[8];
1488 TOKENEXTRA *tp_orig = *tp;
1489 int i, pl;
1490 BLOCK_SIZE subsize;
1491 int this_rate, sum_rate = 0, best_rate = INT_MAX;
1492 int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
1493 int64_t sum_rd = 0;
1494 int do_split = bsize >= BLOCK_8X8;
1495 int do_rect = 1;
1496 // Override skipping rectangular partition operations for edge blocks
1497 const int force_horz_split = (mi_row + ms >= cm->mi_rows);
1498 const int force_vert_split = (mi_col + ms >= cm->mi_cols);
1499
1500 int partition_none_allowed = !force_horz_split && !force_vert_split;
1501 int partition_horz_allowed = !force_vert_split && bsize >= BLOCK_8X8;
1502 int partition_vert_allowed = !force_horz_split && bsize >= BLOCK_8X8;
1503
1504 int partition_split_done = 0;
1505 (void) *tp_orig;
1506
1507 if (bsize < BLOCK_8X8) {
1508 // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
1509 // there is nothing to be done.
1510 if (x->ab_index != 0) {
1511 *rate = 0;
1512 *dist = 0;
1513 return;
1514 }
1515 }
1516 assert(num_8x8_blocks_wide_lookup[bsize] ==
1517 num_8x8_blocks_high_lookup[bsize]);
1518
1519 if (bsize == BLOCK_16X16) {
1520 set_offsets(cpi, tile, mi_row, mi_col, bsize);
1521 x->mb_energy = vp9_block_energy(cpi, x, bsize);
1522 }
1523
1524 // Determine partition types in search according to the speed features.
1525 // The threshold set here has to be of square block size.
1526 if (cpi->sf.auto_min_max_partition_size) {
1527 partition_none_allowed &= (bsize <= cpi->sf.max_partition_size &&
1528 bsize >= cpi->sf.min_partition_size);
1529 partition_horz_allowed &= ((bsize <= cpi->sf.max_partition_size &&
1530 bsize > cpi->sf.min_partition_size) ||
1531 force_horz_split);
1532 partition_vert_allowed &= ((bsize <= cpi->sf.max_partition_size &&
1533 bsize > cpi->sf.min_partition_size) ||
1534 force_vert_split);
1535 do_split &= bsize > cpi->sf.min_partition_size;
1536 }
1537 if (cpi->sf.use_square_partition_only) {
1538 partition_horz_allowed &= force_horz_split;
1539 partition_vert_allowed &= force_vert_split;
1540 }
1541
1542 save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1543
1544 if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
1545 unsigned int source_variancey;
1546 vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
1547 source_variancey = get_sby_perpixel_variance(cpi, x, bsize);
1548 if (source_variancey < cpi->sf.disable_split_var_thresh) {
1549 do_split = 0;
1550 if (source_variancey < cpi->sf.disable_split_var_thresh / 2)
1551 do_rect = 0;
1552 }
1553 }
1554
1555 // PARTITION_NONE
1556 if (partition_none_allowed) {
1557 pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
1558 get_block_context(x, bsize), best_rd);
1559 if (this_rate != INT_MAX) {
1560 if (bsize >= BLOCK_8X8) {
1561 pl = partition_plane_context(cpi->above_seg_context,
1562 cpi->left_seg_context,
1563 mi_row, mi_col, bsize);
1564 this_rate += x->partition_cost[pl][PARTITION_NONE];
1565 }
1566 sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
1567 if (sum_rd < best_rd) {
1568 int64_t stop_thresh = 2048;
1569
1570 best_rate = this_rate;
1571 best_dist = this_dist;
1572 best_rd = sum_rd;
1573 if (bsize >= BLOCK_8X8)
1574 *(get_sb_partitioning(x, bsize)) = bsize;
1575
1576 // Adjust threshold according to partition size.
1577 stop_thresh >>= 8 - (b_width_log2_lookup[bsize] +
1578 b_height_log2_lookup[bsize]);
1579
1580 // If obtained distortion is very small, choose current partition
1581 // and stop splitting.
1582 if (this_dist < stop_thresh) {
1583 do_split = 0;
1584 do_rect = 0;
1585 }
1586 }
1587 }
1588 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1589 }
1590
1591 // store estimated motion vector
1592 if (cpi->sf.adaptive_motion_search)
1593 store_pred_mv(x, get_block_context(x, bsize));
1594
1595 // PARTITION_SPLIT
1596 sum_rd = 0;
1597 // TODO(jingning): use the motion vectors given by the above search as
1598 // the starting point of motion search in the following partition type check.
1599 if (do_split) {
1600 subsize = get_subsize(bsize, PARTITION_SPLIT);
1601 for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
1602 const int x_idx = (i & 1) * ms;
1603 const int y_idx = (i >> 1) * ms;
1604
1605 if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
1606 continue;
1607
1608 *get_sb_index(x, subsize) = i;
1609 if (cpi->sf.adaptive_motion_search)
1610 load_pred_mv(x, get_block_context(x, bsize));
1611 rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize,
1612 &this_rate, &this_dist, i != 3, best_rd - sum_rd);
1613
1614 if (this_rate == INT_MAX) {
1615 sum_rd = INT64_MAX;
1616 } else {
1617 sum_rate += this_rate;
1618 sum_dist += this_dist;
1619 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1620 }
1621 }
1622 if (sum_rd < best_rd && i == 4) {
1623 pl = partition_plane_context(cpi->above_seg_context,
1624 cpi->left_seg_context,
1625 mi_row, mi_col, bsize);
1626 sum_rate += x->partition_cost[pl][PARTITION_SPLIT];
1627 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1628 if (sum_rd < best_rd) {
1629 best_rate = sum_rate;
1630 best_dist = sum_dist;
1631 best_rd = sum_rd;
1632 *(get_sb_partitioning(x, bsize)) = subsize;
1633 }
1634 } else {
1635 // skip rectangular partition test when larger block size
1636 // gives better rd cost
1637 if (cpi->sf.less_rectangular_check)
1638 do_rect &= !partition_none_allowed;
1639 }
1640 partition_split_done = 1;
1641 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1642 }
1643
1644 x->fast_ms = 0;
1645 x->subblock_ref = 0;
1646
1647 if (partition_split_done &&
1648 cpi->sf.using_small_partition_info) {
1649 compute_fast_motion_search_level(cpi, bsize);
1650 }
1651
1652 // PARTITION_HORZ
1653 if (partition_horz_allowed && do_rect) {
1654 subsize = get_subsize(bsize, PARTITION_HORZ);
1655 *get_sb_index(x, subsize) = 0;
1656 if (cpi->sf.adaptive_motion_search)
1657 load_pred_mv(x, get_block_context(x, bsize));
1658 pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
1659 get_block_context(x, subsize), best_rd);
1660 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1661
1662 if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
1663 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1664 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1665
1666 *get_sb_index(x, subsize) = 1;
1667 if (cpi->sf.adaptive_motion_search)
1668 load_pred_mv(x, get_block_context(x, bsize));
1669 pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate,
1670 &this_dist, subsize, get_block_context(x, subsize),
1671 best_rd - sum_rd);
1672 if (this_rate == INT_MAX) {
1673 sum_rd = INT64_MAX;
1674 } else {
1675 sum_rate += this_rate;
1676 sum_dist += this_dist;
1677 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1678 }
1679 }
1680 if (sum_rd < best_rd) {
1681 pl = partition_plane_context(cpi->above_seg_context,
1682 cpi->left_seg_context,
1683 mi_row, mi_col, bsize);
1684 sum_rate += x->partition_cost[pl][PARTITION_HORZ];
1685 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1686 if (sum_rd < best_rd) {
1687 best_rd = sum_rd;
1688 best_rate = sum_rate;
1689 best_dist = sum_dist;
1690 *(get_sb_partitioning(x, bsize)) = subsize;
1691 }
1692 }
1693 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1694 }
1695
1696 // PARTITION_VERT
1697 if (partition_vert_allowed && do_rect) {
1698 subsize = get_subsize(bsize, PARTITION_VERT);
1699
1700 *get_sb_index(x, subsize) = 0;
1701 if (cpi->sf.adaptive_motion_search)
1702 load_pred_mv(x, get_block_context(x, bsize));
1703 pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize,
1704 get_block_context(x, subsize), best_rd);
1705 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1706 if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
1707 update_state(cpi, get_block_context(x, subsize), subsize, 0);
1708 encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize);
1709
1710 *get_sb_index(x, subsize) = 1;
1711 if (cpi->sf.adaptive_motion_search)
1712 load_pred_mv(x, get_block_context(x, bsize));
1713 pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate,
1714 &this_dist, subsize, get_block_context(x, subsize),
1715 best_rd - sum_rd);
1716 if (this_rate == INT_MAX) {
1717 sum_rd = INT64_MAX;
1718 } else {
1719 sum_rate += this_rate;
1720 sum_dist += this_dist;
1721 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1722 }
1723 }
1724 if (sum_rd < best_rd) {
1725 pl = partition_plane_context(cpi->above_seg_context,
1726 cpi->left_seg_context,
1727 mi_row, mi_col, bsize);
1728 sum_rate += x->partition_cost[pl][PARTITION_VERT];
1729 sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
1730 if (sum_rd < best_rd) {
1731 best_rate = sum_rate;
1732 best_dist = sum_dist;
1733 best_rd = sum_rd;
1734 *(get_sb_partitioning(x, bsize)) = subsize;
1735 }
1736 }
1737 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
1738 }
1739
1740
1741 *rate = best_rate;
1742 *dist = best_dist;
1743
1744 if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon)
1745 encode_sb(cpi, tile, tp, mi_row, mi_col, bsize == BLOCK_64X64, bsize);
1746 if (bsize == BLOCK_64X64) {
1747 assert(tp_orig < *tp);
1748 assert(best_rate < INT_MAX);
1749 assert(best_dist < INT_MAX);
1750 } else {
1751 assert(tp_orig == *tp);
1752 }
1753 }
1754
1755 // Examines 64x64 block and chooses a best reference frame
1756 static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile,
1757 int mi_row, int mi_col) {
1758 VP9_COMMON * const cm = &cpi->common;
1759 MACROBLOCK * const x = &cpi->mb;
1760 int bsl = b_width_log2(BLOCK_64X64), bs = 1 << bsl;
1761 int ms = bs / 2;
1762 ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1763 PARTITION_CONTEXT sl[8], sa[8];
1764 int pl;
1765 int r;
1766 int64_t d;
1767
1768 save_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
1769
1770 // Default is non mask (all reference frames allowed.
1771 cpi->ref_frame_mask = 0;
1772
1773 // Do RD search for 64x64.
1774 if ((mi_row + (ms >> 1) < cm->mi_rows) &&
1775 (mi_col + (ms >> 1) < cm->mi_cols)) {
1776 cpi->set_ref_frame_mask = 1;
1777 pick_sb_modes(cpi, tile, mi_row, mi_col, &r, &d, BLOCK_64X64,
1778 get_block_context(x, BLOCK_64X64), INT64_MAX);
1779 pl = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context,
1780 mi_row, mi_col, BLOCK_64X64);
1781 r += x->partition_cost[pl][PARTITION_NONE];
1782
1783 *(get_sb_partitioning(x, BLOCK_64X64)) = BLOCK_64X64;
1784 cpi->set_ref_frame_mask = 0;
1785 }
1786
1787 restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64);
1788 }
1789
1790 static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
1791 int mi_row, TOKENEXTRA **tp) {
1792 VP9_COMMON * const cm = &cpi->common;
1793 int mi_col;
1794
1795 // Initialize the left context for the new SB row
1796 vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context));
1797 vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context));
1798
1799 // Code each SB in the row
1800 for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
1801 mi_col += MI_BLOCK_SIZE) {
1802 int dummy_rate;
1803 int64_t dummy_dist;
1804
1805 vp9_zero(cpi->mb.pred_mv);
1806
1807 if (cpi->sf.reference_masking)
1808 rd_pick_reference_frame(cpi, tile, mi_row, mi_col);
1809
1810 if (cpi->sf.use_lastframe_partitioning ||
1811 cpi->sf.use_one_partition_size_always ) {
1812 const int idx_str = cm->mode_info_stride * mi_row + mi_col;
1813 MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
1814 MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
1815
1816 cpi->mb.source_variance = UINT_MAX;
1817 if (cpi->sf.use_one_partition_size_always) {
1818 set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
1819 set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col);
1820 rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1821 &dummy_rate, &dummy_dist, 1);
1822 } else {
1823 if ((cpi->common.current_video_frame
1824 % cpi->sf.last_partitioning_redo_frequency) == 0
1825 || cm->prev_mi == 0
1826 || cpi->common.show_frame == 0
1827 || cpi->common.frame_type == KEY_FRAME
1828 || cpi->is_src_frame_alt_ref
1829 || ((cpi->sf.use_lastframe_partitioning ==
1830 LAST_FRAME_PARTITION_LOW_MOTION) &&
1831 sb_has_motion(cpi, prev_mi_8x8))) {
1832 // If required set upper and lower partition size limits
1833 if (cpi->sf.auto_min_max_partition_size) {
1834 set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
1835 rd_auto_partition_range(cpi, tile, mi_row, mi_col,
1836 &cpi->sf.min_partition_size,
1837 &cpi->sf.max_partition_size);
1838 }
1839 rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
1840 &dummy_rate, &dummy_dist, 1, INT64_MAX);
1841 } else {
1842 copy_partitioning(cpi, mi_8x8, prev_mi_8x8);
1843 rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1844 &dummy_rate, &dummy_dist, 1);
1845 }
1846 }
1847 } else {
1848 // If required set upper and lower partition size limits
1849 if (cpi->sf.auto_min_max_partition_size) {
1850 set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
1851 rd_auto_partition_range(cpi, tile, mi_row, mi_col,
1852 &cpi->sf.min_partition_size,
1853 &cpi->sf.max_partition_size);
1854 }
1855 rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
1856 &dummy_rate, &dummy_dist, 1, INT64_MAX);
1857 }
1858 }
1859 }
1860
1861 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
1862 MACROBLOCK *const x = &cpi->mb;
1863 VP9_COMMON *const cm = &cpi->common;
1864 MACROBLOCKD *const xd = &x->e_mbd;
1865 const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
1866
1867 x->act_zbin_adj = 0;
1868 cpi->seg0_idx = 0;
1869
1870 xd->mode_info_stride = cm->mode_info_stride;
1871
1872 // reset intra mode contexts
1873 if (frame_is_intra_only(cm))
1874 vp9_init_mbmode_probs(cm);
1875
1876 // Copy data over into macro block data structures.
1877 vp9_setup_src_planes(x, cpi->Source, 0, 0);
1878
1879 // TODO(jkoleszar): are these initializations required?
1880 setup_pre_planes(xd, 0, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]],
1881 0, 0, NULL);
1882 setup_dst_planes(xd, get_frame_new_buffer(cm), 0, 0);
1883
1884 setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
1885
1886 xd->mi_8x8[0]->mbmi.mode = DC_PRED;
1887 xd->mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1888
1889 vp9_zero(cpi->y_mode_count);
1890 vp9_zero(cpi->y_uv_mode_count);
1891 vp9_zero(cm->counts.inter_mode);
1892 vp9_zero(cpi->partition_count);
1893 vp9_zero(cpi->intra_inter_count);
1894 vp9_zero(cpi->comp_inter_count);
1895 vp9_zero(cpi->single_ref_count);
1896 vp9_zero(cpi->comp_ref_count);
1897 vp9_zero(cm->counts.tx);
1898 vp9_zero(cm->counts.mbskip);
1899
1900 // Note: this memset assumes above_context[0], [1] and [2]
1901 // are allocated as part of the same buffer.
1902 vpx_memset(cpi->above_context[0], 0,
1903 sizeof(*cpi->above_context[0]) *
1904 2 * aligned_mi_cols * MAX_MB_PLANE);
1905 vpx_memset(cpi->above_seg_context, 0,
1906 sizeof(*cpi->above_seg_context) * aligned_mi_cols);
1907 }
1908
1909 static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
1910 if (lossless) {
1911 // printf("Switching to lossless\n");
1912 cpi->mb.fwd_txm4x4 = vp9_fwht4x4;
1913 cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
1914 cpi->mb.optimize = 0;
1915 cpi->common.lf.filter_level = 0;
1916 cpi->zbin_mode_boost_enabled = 0;
1917 cpi->common.tx_mode = ONLY_4X4;
1918 } else {
1919 // printf("Not lossless\n");
1920 cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
1921 cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
1922 }
1923 }
1924
1925 static void switch_tx_mode(VP9_COMP *cpi) {
1926 if (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
1927 cpi->common.tx_mode >= ALLOW_32X32)
1928 cpi->common.tx_mode = ALLOW_32X32;
1929 }
1930
1931 static void encode_frame_internal(VP9_COMP *cpi) {
1932 int mi_row;
1933 MACROBLOCK * const x = &cpi->mb;
1934 VP9_COMMON * const cm = &cpi->common;
1935 MACROBLOCKD * const xd = &x->e_mbd;
1936
1937 // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
1938 // cpi->common.current_video_frame, cpi->common.show_frame,
1939 // cm->frame_type);
1940
1941 // debug output
1942 #if DBG_PRNT_SEGMAP
1943 {
1944 FILE *statsfile;
1945 statsfile = fopen("segmap2.stt", "a");
1946 fprintf(statsfile, "\n");
1947 fclose(statsfile);
1948 }
1949 #endif
1950
1951 vp9_zero(cm->counts.switchable_interp);
1952 vp9_zero(cpi->tx_stepdown_count);
1953
1954 xd->mi_8x8 = cm->mi_grid_visible;
1955 // required for vp9_frame_init_quantizer
1956 xd->mi_8x8[0] = cm->mi;
1957
1958 xd->last_mi = cm->prev_mi;
1959
1960 vp9_zero(cpi->NMVcount);
1961 vp9_zero(cpi->coef_counts);
1962 vp9_zero(cm->counts.eob_branch);
1963
1964 cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
1965 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
1966 switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
1967
1968 vp9_frame_init_quantizer(cpi);
1969
1970 vp9_initialize_rd_consts(cpi);
1971 vp9_initialize_me_consts(cpi, cm->base_qindex);
1972 switch_tx_mode(cpi);
1973
1974 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
1975 // Initialize encode frame context.
1976 init_encode_frame_mb_context(cpi);
1977
1978 // Build a frame level activity map
1979 build_activity_map(cpi);
1980 }
1981
1982 // Re-initialize encode frame context.
1983 init_encode_frame_mb_context(cpi);
1984
1985 vp9_zero(cpi->rd_comp_pred_diff);
1986 vp9_zero(cpi->rd_filter_diff);
1987 vp9_zero(cpi->rd_tx_select_diff);
1988 vp9_zero(cpi->rd_tx_select_threshes);
1989
1990 set_prev_mi(cm);
1991
1992 {
1993 struct vpx_usec_timer emr_timer;
1994 vpx_usec_timer_start(&emr_timer);
1995
1996 {
1997 // Take tiles into account and give start/end MB
1998 int tile_col, tile_row;
1999 TOKENEXTRA *tp = cpi->tok;
2000 const int tile_cols = 1 << cm->log2_tile_cols;
2001 const int tile_rows = 1 << cm->log2_tile_rows;
2002
2003 for (tile_row = 0; tile_row < tile_rows; tile_row++) {
2004 for (tile_col = 0; tile_col < tile_cols; tile_col++) {
2005 TileInfo tile;
2006 TOKENEXTRA *tp_old = tp;
2007
2008 // For each row of SBs in the frame
2009 vp9_tile_init(&tile, cm, tile_row, tile_col);
2010 for (mi_row = tile.mi_row_start;
2011 mi_row < tile.mi_row_end; mi_row += 8)
2012 encode_sb_row(cpi, &tile, mi_row, &tp);
2013
2014 cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
2015 assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
2016 }
2017 }
2018 }
2019
2020 vpx_usec_timer_mark(&emr_timer);
2021 cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
2022 }
2023
2024 if (cpi->sf.skip_encode_sb) {
2025 int j;
2026 unsigned int intra_count = 0, inter_count = 0;
2027 for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
2028 intra_count += cpi->intra_inter_count[j][0];
2029 inter_count += cpi->intra_inter_count[j][1];
2030 }
2031 cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
2032 cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
2033 cpi->sf.skip_encode_frame &= cm->show_frame;
2034 } else {
2035 cpi->sf.skip_encode_frame = 0;
2036 }
2037
2038 #if 0
2039 // Keep record of the total distortion this time around for future use
2040 cpi->last_frame_distortion = cpi->frame_distortion;
2041 #endif
2042 }
2043
2044 static int check_dual_ref_flags(VP9_COMP *cpi) {
2045 const int ref_flags = cpi->ref_frame_flags;
2046
2047 if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
2048 return 0;
2049 } else {
2050 return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
2051 + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
2052 }
2053 }
2054
2055 static int get_skip_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs) {
2056 int x, y;
2057
2058 for (y = 0; y < ymbs; y++) {
2059 for (x = 0; x < xmbs; x++) {
2060 if (!mi_8x8[y * mis + x]->mbmi.skip_coeff)
2061 return 0;
2062 }
2063 }
2064
2065 return 1;
2066 }
2067
2068 static void set_txfm_flag(MODE_INFO **mi_8x8, int mis, int ymbs, int xmbs,
2069 TX_SIZE tx_size) {
2070 int x, y;
2071
2072 for (y = 0; y < ymbs; y++) {
2073 for (x = 0; x < xmbs; x++)
2074 mi_8x8[y * mis + x]->mbmi.tx_size = tx_size;
2075 }
2076 }
2077
2078 static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO **mi_8x8,
2079 int mis, TX_SIZE max_tx_size, int bw, int bh,
2080 int mi_row, int mi_col, BLOCK_SIZE bsize) {
2081 VP9_COMMON * const cm = &cpi->common;
2082
2083 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
2084 return;
2085 } else {
2086 MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi;
2087 if (mbmi->tx_size > max_tx_size) {
2088 const int ymbs = MIN(bh, cm->mi_rows - mi_row);
2089 const int xmbs = MIN(bw, cm->mi_cols - mi_col);
2090
2091 assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
2092 get_skip_flag(mi_8x8, mis, ymbs, xmbs));
2093 set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size);
2094 }
2095 }
2096 }
2097
2098 static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO **mi_8x8,
2099 TX_SIZE max_tx_size, int mi_row, int mi_col,
2100 BLOCK_SIZE bsize) {
2101 VP9_COMMON * const cm = &cpi->common;
2102 const int mis = cm->mode_info_stride;
2103 int bw, bh;
2104 const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
2105
2106 if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
2107 return;
2108
2109 bw = num_8x8_blocks_wide_lookup[mi_8x8[0]->mbmi.sb_type];
2110 bh = num_8x8_blocks_high_lookup[mi_8x8[0]->mbmi.sb_type];
2111
2112 if (bw == bs && bh == bs) {
2113 reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, bs, bs, mi_row,
2114 mi_col, bsize);
2115 } else if (bw == bs && bh < bs) {
2116 reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, bs, hbs, mi_row,
2117 mi_col, bsize);
2118 reset_skip_txfm_size_b(cpi, mi_8x8 + hbs * mis, mis, max_tx_size, bs, hbs,
2119 mi_row + hbs, mi_col, bsize);
2120 } else if (bw < bs && bh == bs) {
2121 reset_skip_txfm_size_b(cpi, mi_8x8, mis, max_tx_size, hbs, bs, mi_row,
2122 mi_col, bsize);
2123 reset_skip_txfm_size_b(cpi, mi_8x8 + hbs, mis, max_tx_size, hbs, bs, mi_row,
2124 mi_col + hbs, bsize);
2125
2126 } else {
2127 const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
2128 int n;
2129
2130 assert(bw < bs && bh < bs);
2131
2132 for (n = 0; n < 4; n++) {
2133 const int mi_dc = hbs * (n & 1);
2134 const int mi_dr = hbs * (n >> 1);
2135
2136 reset_skip_txfm_size_sb(cpi, &mi_8x8[mi_dr * mis + mi_dc], max_tx_size,
2137 mi_row + mi_dr, mi_col + mi_dc, subsize);
2138 }
2139 }
2140 }
2141
2142 static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
2143 VP9_COMMON * const cm = &cpi->common;
2144 int mi_row, mi_col;
2145 const int mis = cm->mode_info_stride;
2146 // MODE_INFO *mi, *mi_ptr = cm->mi;
2147 MODE_INFO **mi_8x8, **mi_ptr = cm->mi_grid_visible;
2148
2149 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * mis) {
2150 mi_8x8 = mi_ptr;
2151 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8, mi_8x8 += 8) {
2152 reset_skip_txfm_size_sb(cpi, mi_8x8, txfm_max, mi_row, mi_col,
2153 BLOCK_64X64);
2154 }
2155 }
2156 }
2157
2158 static int get_frame_type(VP9_COMP *cpi) {
2159 int frame_type;
2160 if (frame_is_intra_only(&cpi->common))
2161 frame_type = 0;
2162 else if (cpi->is_src_frame_alt_ref && cpi->refresh_golden_frame)
2163 frame_type = 3;
2164 else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
2165 frame_type = 1;
2166 else
2167 frame_type = 2;
2168 return frame_type;
2169 }
2170
2171 static void select_tx_mode(VP9_COMP *cpi) {
2172 if (cpi->oxcf.lossless) {
2173 cpi->common.tx_mode = ONLY_4X4;
2174 } else if (cpi->common.current_video_frame == 0) {
2175 cpi->common.tx_mode = TX_MODE_SELECT;
2176 } else {
2177 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
2178 cpi->common.tx_mode = ALLOW_32X32;
2179 } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
2180 int frame_type = get_frame_type(cpi);
2181 cpi->common.tx_mode =
2182 cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32]
2183 > cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
2184 ALLOW_32X32 : TX_MODE_SELECT;
2185 } else {
2186 unsigned int total = 0;
2187 int i;
2188 for (i = 0; i < TX_SIZES; ++i)
2189 total += cpi->tx_stepdown_count[i];
2190 if (total) {
2191 double fraction = (double)cpi->tx_stepdown_count[0] / total;
2192 cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT;
2193 // printf("fraction = %f\n", fraction);
2194 } // else keep unchanged
2195 }
2196 }
2197 }
2198
2199 void vp9_encode_frame(VP9_COMP *cpi) {
2200 VP9_COMMON * const cm = &cpi->common;
2201
2202 // In the longer term the encoder should be generalized to match the
2203 // decoder such that we allow compound where one of the 3 buffers has a
2204 // different sign bias and that buffer is then the fixed ref. However, this
2205 // requires further work in the rd loop. For now the only supported encoder
2206 // side behavior is where the ALT ref buffer has opposite sign bias to
2207 // the other two.
2208 if (!frame_is_intra_only(cm)) {
2209 if ((cm->ref_frame_sign_bias[ALTREF_FRAME]
2210 == cm->ref_frame_sign_bias[GOLDEN_FRAME])
2211 || (cm->ref_frame_sign_bias[ALTREF_FRAME]
2212 == cm->ref_frame_sign_bias[LAST_FRAME])) {
2213 cm->allow_comp_inter_inter = 0;
2214 } else {
2215 cm->allow_comp_inter_inter = 1;
2216 cm->comp_fixed_ref = ALTREF_FRAME;
2217 cm->comp_var_ref[0] = LAST_FRAME;
2218 cm->comp_var_ref[1] = GOLDEN_FRAME;
2219 }
2220 }
2221
2222 if (cpi->sf.RD) {
2223 int i, pred_type;
2224 INTERPOLATION_TYPE filter_type;
2225 /*
2226 * This code does a single RD pass over the whole frame assuming
2227 * either compound, single or hybrid prediction as per whatever has
2228 * worked best for that type of frame in the past.
2229 * It also predicts whether another coding mode would have worked
2230 * better that this coding mode. If that is the case, it remembers
2231 * that for subsequent frames.
2232 * It does the same analysis for transform size selection also.
2233 */
2234 int frame_type = get_frame_type(cpi);
2235
2236 /* prediction (compound, single or hybrid) mode selection */
2237 if (frame_type == 3 || !cm->allow_comp_inter_inter)
2238 pred_type = SINGLE_PREDICTION_ONLY;
2239 else if (cpi->rd_prediction_type_threshes[frame_type][1]
2240 > cpi->rd_prediction_type_threshes[frame_type][0]
2241 && cpi->rd_prediction_type_threshes[frame_type][1]
2242 > cpi->rd_prediction_type_threshes[frame_type][2]
2243 && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
2244 pred_type = COMP_PREDICTION_ONLY;
2245 else if (cpi->rd_prediction_type_threshes[frame_type][0]
2246 > cpi->rd_prediction_type_threshes[frame_type][2])
2247 pred_type = SINGLE_PREDICTION_ONLY;
2248 else
2249 pred_type = HYBRID_PREDICTION;
2250
2251 /* filter type selection */
2252 // FIXME(rbultje) for some odd reason, we often select smooth_filter
2253 // as default filter for ARF overlay frames. This is a REALLY BAD
2254 // IDEA so we explicitly disable it here.
2255 if (frame_type != 3 &&
2256 cpi->rd_filter_threshes[frame_type][1] >
2257 cpi->rd_filter_threshes[frame_type][0] &&
2258 cpi->rd_filter_threshes[frame_type][1] >
2259 cpi->rd_filter_threshes[frame_type][2] &&
2260 cpi->rd_filter_threshes[frame_type][1] >
2261 cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
2262 filter_type = EIGHTTAP_SMOOTH;
2263 } else if (cpi->rd_filter_threshes[frame_type][2] >
2264 cpi->rd_filter_threshes[frame_type][0] &&
2265 cpi->rd_filter_threshes[frame_type][2] >
2266 cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
2267 filter_type = EIGHTTAP_SHARP;
2268 } else if (cpi->rd_filter_threshes[frame_type][0] >
2269 cpi->rd_filter_threshes[frame_type][SWITCHABLE_FILTERS]) {
2270 filter_type = EIGHTTAP;
2271 } else {
2272 filter_type = SWITCHABLE;
2273 }
2274
2275 cpi->mb.e_mbd.lossless = 0;
2276 if (cpi->oxcf.lossless) {
2277 cpi->mb.e_mbd.lossless = 1;
2278 }
2279
2280 /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */
2281 select_tx_mode(cpi);
2282 cpi->common.comp_pred_mode = pred_type;
2283 cpi->common.mcomp_filter_type = filter_type;
2284 encode_frame_internal(cpi);
2285
2286 for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
2287 const int diff = (int) (cpi->rd_comp_pred_diff[i] / cpi->common.MBs);
2288 cpi->rd_prediction_type_threshes[frame_type][i] += diff;
2289 cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
2290 }
2291
2292 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
2293 const int64_t diff = cpi->rd_filter_diff[i] / cpi->common.MBs;
2294 cpi->rd_filter_threshes[frame_type][i] =
2295 (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
2296 }
2297
2298 for (i = 0; i < TX_MODES; ++i) {
2299 int64_t pd = cpi->rd_tx_select_diff[i];
2300 int diff;
2301 if (i == TX_MODE_SELECT)
2302 pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv,
2303 2048 * (TX_SIZES - 1), 0);
2304 diff = (int) (pd / cpi->common.MBs);
2305 cpi->rd_tx_select_threshes[frame_type][i] += diff;
2306 cpi->rd_tx_select_threshes[frame_type][i] /= 2;
2307 }
2308
2309 if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
2310 int single_count_zero = 0;
2311 int comp_count_zero = 0;
2312
2313 for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
2314 single_count_zero += cpi->comp_inter_count[i][0];
2315 comp_count_zero += cpi->comp_inter_count[i][1];
2316 }
2317
2318 if (comp_count_zero == 0) {
2319 cpi->common.comp_pred_mode = SINGLE_PREDICTION_ONLY;
2320 vp9_zero(cpi->comp_inter_count);
2321 } else if (single_count_zero == 0) {
2322 cpi->common.comp_pred_mode = COMP_PREDICTION_ONLY;
2323 vp9_zero(cpi->comp_inter_count);
2324 }
2325 }
2326
2327 if (cpi->common.tx_mode == TX_MODE_SELECT) {
2328 int count4x4 = 0;
2329 int count8x8_lp = 0, count8x8_8x8p = 0;
2330 int count16x16_16x16p = 0, count16x16_lp = 0;
2331 int count32x32 = 0;
2332
2333 for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
2334 count4x4 += cm->counts.tx.p32x32[i][TX_4X4];
2335 count4x4 += cm->counts.tx.p16x16[i][TX_4X4];
2336 count4x4 += cm->counts.tx.p8x8[i][TX_4X4];
2337
2338 count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8];
2339 count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8];
2340 count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8];
2341
2342 count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16];
2343 count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16];
2344 count32x32 += cm->counts.tx.p32x32[i][TX_32X32];
2345 }
2346
2347 if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0
2348 && count32x32 == 0) {
2349 cpi->common.tx_mode = ALLOW_8X8;
2350 reset_skip_txfm_size(cpi, TX_8X8);
2351 } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0
2352 && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
2353 cpi->common.tx_mode = ONLY_4X4;
2354 reset_skip_txfm_size(cpi, TX_4X4);
2355 } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
2356 cpi->common.tx_mode = ALLOW_32X32;
2357 } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
2358 cpi->common.tx_mode = ALLOW_16X16;
2359 reset_skip_txfm_size(cpi, TX_16X16);
2360 }
2361 }
2362 } else {
2363 encode_frame_internal(cpi);
2364 }
2365 }
2366
2367 static void sum_intra_stats(VP9_COMP *cpi, const MODE_INFO *mi) {
2368 const MB_PREDICTION_MODE y_mode = mi->mbmi.mode;
2369 const MB_PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
2370 const BLOCK_SIZE bsize = mi->mbmi.sb_type;
2371
2372 ++cpi->y_uv_mode_count[y_mode][uv_mode];
2373
2374 if (bsize < BLOCK_8X8) {
2375 int idx, idy;
2376 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
2377 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
2378 for (idy = 0; idy < 2; idy += num_4x4_blocks_high)
2379 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide)
2380 ++cpi->y_mode_count[0][mi->bmi[idy * 2 + idx].as_mode];
2381 } else {
2382 ++cpi->y_mode_count[size_group_lookup[bsize]][y_mode];
2383 }
2384 }
2385
2386 // Experimental stub function to create a per MB zbin adjustment based on
2387 // some previously calculated measure of MB activity.
2388 static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
2389 #if USE_ACT_INDEX
2390 x->act_zbin_adj = *(x->mb_activity_ptr);
2391 #else
2392 int64_t a;
2393 int64_t b;
2394 int64_t act = *(x->mb_activity_ptr);
2395
2396 // Apply the masking to the RD multiplier.
2397 a = act + 4 * cpi->activity_avg;
2398 b = 4 * act + cpi->activity_avg;
2399
2400 if (act > cpi->activity_avg)
2401 x->act_zbin_adj = (int) (((int64_t) b + (a >> 1)) / a) - 1;
2402 else
2403 x->act_zbin_adj = 1 - (int) (((int64_t) a + (b >> 1)) / b);
2404 #endif
2405 }
2406 static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
2407 int mi_row, int mi_col, BLOCK_SIZE bsize) {
2408 VP9_COMMON * const cm = &cpi->common;
2409 MACROBLOCK * const x = &cpi->mb;
2410 MACROBLOCKD * const xd = &x->e_mbd;
2411 MODE_INFO **mi_8x8 = xd->mi_8x8;
2412 MODE_INFO *mi = mi_8x8[0];
2413 MB_MODE_INFO *mbmi = &mi->mbmi;
2414 PICK_MODE_CONTEXT *ctx = get_block_context(x, bsize);
2415 unsigned int segment_id = mbmi->segment_id;
2416 const int mis = cm->mode_info_stride;
2417 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
2418 const int mi_height = num_8x8_blocks_high_lookup[bsize];
2419 x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8;
2420 x->skip_optimize = ctx->is_coded;
2421 ctx->is_coded = 1;
2422 x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
2423 x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
2424 x->q_index < QIDX_SKIP_THRESH);
2425 if (x->skip_encode)
2426 return;
2427
2428 if (cm->frame_type == KEY_FRAME) {
2429 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
2430 adjust_act_zbin(cpi, x);
2431 vp9_update_zbin_extra(cpi, x);
2432 }
2433 } else {
2434 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2435
2436 if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
2437 // Adjust the zbin based on this MB rate.
2438 adjust_act_zbin(cpi, x);
2439 }
2440
2441 // Experimental code. Special case for gf and arf zeromv modes.
2442 // Increase zbin size to suppress noise
2443 cpi->zbin_mode_boost = 0;
2444 if (cpi->zbin_mode_boost_enabled) {
2445 if (is_inter_block(mbmi)) {
2446 if (mbmi->mode == ZEROMV) {
2447 if (mbmi->ref_frame[0] != LAST_FRAME)
2448 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
2449 else
2450 cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
2451 } else if (mbmi->sb_type < BLOCK_8X8) {
2452 cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
2453 } else {
2454 cpi->zbin_mode_boost = MV_ZBIN_BOOST;
2455 }
2456 } else {
2457 cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
2458 }
2459 }
2460
2461 vp9_update_zbin_extra(cpi, x);
2462 }
2463
2464 if (!is_inter_block(mbmi)) {
2465 vp9_encode_intra_block_y(x, MAX(bsize, BLOCK_8X8));
2466 vp9_encode_intra_block_uv(x, MAX(bsize, BLOCK_8X8));
2467 if (output_enabled)
2468 sum_intra_stats(cpi, mi);
2469 } else {
2470 int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])];
2471 YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
2472 YV12_BUFFER_CONFIG *second_ref_fb = NULL;
2473 if (has_second_ref(mbmi)) {
2474 idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])];
2475 second_ref_fb = &cm->yv12_fb[idx];
2476 }
2477
2478 assert(cm->frame_type != KEY_FRAME);
2479
2480 setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
2481 &xd->scale_factor[0]);
2482 setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
2483 &xd->scale_factor[1]);
2484
2485 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
2486 }
2487
2488 if (!is_inter_block(mbmi)) {
2489 vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
2490 } else if (!x->skip) {
2491 vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
2492 vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8));
2493 } else {
2494 int mb_skip_context = xd->left_available ? mi_8x8[-1]->mbmi.skip_coeff : 0;
2495 mb_skip_context += mi_8x8[-mis] ? mi_8x8[-mis]->mbmi.skip_coeff : 0;
2496
2497 mbmi->skip_coeff = 1;
2498 if (output_enabled)
2499 cm->counts.mbskip[mb_skip_context][1]++;
2500 reset_skip_context(xd, MAX(bsize, BLOCK_8X8));
2501 }
2502
2503 if (output_enabled) {
2504 if (cm->tx_mode == TX_MODE_SELECT &&
2505 mbmi->sb_type >= BLOCK_8X8 &&
2506 !(is_inter_block(mbmi) &&
2507 (mbmi->skip_coeff ||
2508 vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)))) {
2509 const uint8_t context = vp9_get_pred_context_tx_size(xd);
2510 ++get_tx_counts(max_txsize_lookup[bsize],
2511 context, &cm->counts.tx)[mbmi->tx_size];
2512 } else {
2513 int x, y;
2514 TX_SIZE sz = tx_mode_to_biggest_tx_size[cm->tx_mode];
2515 assert(sizeof(tx_mode_to_biggest_tx_size) /
2516 sizeof(tx_mode_to_biggest_tx_size[0]) == TX_MODES);
2517 // The new intra coding scheme requires no change of transform size
2518 if (is_inter_block(&mi->mbmi)) {
2519 if (sz == TX_32X32 && bsize < BLOCK_32X32)
2520 sz = TX_16X16;
2521 if (sz == TX_16X16 && bsize < BLOCK_16X16)
2522 sz = TX_8X8;
2523 if (sz == TX_8X8 && bsize < BLOCK_8X8)
2524 sz = TX_4X4;
2525 } else if (bsize >= BLOCK_8X8) {
2526 sz = mbmi->tx_size;
2527 } else {
2528 sz = TX_4X4;
2529 }
2530
2531 for (y = 0; y < mi_height; y++)
2532 for (x = 0; x < mi_width; x++)
2533 if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
2534 mi_8x8[mis * y + x]->mbmi.tx_size = sz;
2535 }
2536 }
2537 }

mercurial