1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp9/encoder/vp9_mbgraph.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,432 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include <limits.h> 1.15 + 1.16 +#include "vpx_mem/vpx_mem.h" 1.17 +#include "vp9/encoder/vp9_encodeintra.h" 1.18 +#include "vp9/encoder/vp9_rdopt.h" 1.19 +#include "vp9/encoder/vp9_segmentation.h" 1.20 +#include "vp9/encoder/vp9_mcomp.h" 1.21 +#include "vp9/common/vp9_blockd.h" 1.22 +#include "vp9/common/vp9_reconinter.h" 1.23 +#include "vp9/common/vp9_reconintra.h" 1.24 +#include "vp9/common/vp9_systemdependent.h" 1.25 + 1.26 + 1.27 + 1.28 +static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, 1.29 + int_mv *ref_mv, 1.30 + int_mv *dst_mv, 1.31 + int mb_row, 1.32 + int mb_col) { 1.33 + MACROBLOCK *const x = &cpi->mb; 1.34 + MACROBLOCKD *const xd = &x->e_mbd; 1.35 + vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; 1.36 + unsigned int best_err; 1.37 + 1.38 + const int tmp_col_min = x->mv_col_min; 1.39 + const int tmp_col_max = x->mv_col_max; 1.40 + const int tmp_row_min = x->mv_row_min; 1.41 + const int tmp_row_max = x->mv_row_max; 1.42 + int_mv ref_full; 1.43 + 1.44 + // Further step/diamond searches as necessary 1.45 + int step_param = cpi->sf.reduce_first_step_size + 1.46 + (cpi->speed < 8 ? (cpi->speed > 5 ? 1 : 0) : 2); 1.47 + step_param = MIN(step_param, (cpi->sf.max_step_search_steps - 2)); 1.48 + 1.49 + vp9_clamp_mv_min_max(x, &ref_mv->as_mv); 1.50 + 1.51 + ref_full.as_mv.col = ref_mv->as_mv.col >> 3; 1.52 + ref_full.as_mv.row = ref_mv->as_mv.row >> 3; 1.53 + 1.54 + /*cpi->sf.search_method == HEX*/ 1.55 + best_err = vp9_hex_search(x, &ref_full.as_mv, step_param, x->errorperbit, 1.56 + 0, &v_fn_ptr, 1.57 + 0, &ref_mv->as_mv, &dst_mv->as_mv); 1.58 + 1.59 + // Try sub-pixel MC 1.60 + // if (bestsme > error_thresh && bestsme < INT_MAX) 1.61 + { 1.62 + int distortion; 1.63 + unsigned int sse; 1.64 + best_err = cpi->find_fractional_mv_step( 1.65 + x, 1.66 + &dst_mv->as_mv, &ref_mv->as_mv, 1.67 + cpi->common.allow_high_precision_mv, 1.68 + x->errorperbit, &v_fn_ptr, 1.69 + 0, cpi->sf.subpel_iters_per_step, NULL, NULL, 1.70 + & distortion, &sse); 1.71 + } 1.72 + 1.73 + vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); 1.74 + vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); 1.75 + best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, 1.76 + xd->plane[0].dst.buf, xd->plane[0].dst.stride, 1.77 + INT_MAX); 1.78 + 1.79 + /* restore UMV window */ 1.80 + x->mv_col_min = tmp_col_min; 1.81 + x->mv_col_max = tmp_col_max; 1.82 + x->mv_row_min = tmp_row_min; 1.83 + x->mv_row_max = tmp_row_max; 1.84 + 1.85 + return best_err; 1.86 +} 1.87 + 1.88 +static int do_16x16_motion_search(VP9_COMP *cpi, int_mv *ref_mv, int_mv *dst_mv, 1.89 + int mb_row, int mb_col) { 1.90 + MACROBLOCK *const x = &cpi->mb; 1.91 + MACROBLOCKD *const xd = &x->e_mbd; 1.92 + unsigned int err, tmp_err; 1.93 + int_mv tmp_mv; 1.94 + 1.95 + // Try zero MV first 1.96 + // FIXME should really use something like near/nearest MV and/or MV prediction 1.97 + err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, 1.98 + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, 1.99 + INT_MAX); 1.100 + dst_mv->as_int = 0; 1.101 + 1.102 + // Test last reference frame using the previous best mv as the 1.103 + // starting point (best reference) for the search 1.104 + tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col); 1.105 + if (tmp_err < err) { 1.106 + err = tmp_err; 1.107 + dst_mv->as_int = tmp_mv.as_int; 1.108 + } 1.109 + 1.110 + // If the current best reference mv is not centered on 0,0 then do a 0,0 1.111 + // based search as well. 1.112 + if (ref_mv->as_int) { 1.113 + unsigned int tmp_err; 1.114 + int_mv zero_ref_mv, tmp_mv; 1.115 + 1.116 + zero_ref_mv.as_int = 0; 1.117 + tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, 1.118 + mb_row, mb_col); 1.119 + if (tmp_err < err) { 1.120 + dst_mv->as_int = tmp_mv.as_int; 1.121 + err = tmp_err; 1.122 + } 1.123 + } 1.124 + 1.125 + return err; 1.126 +} 1.127 + 1.128 +static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { 1.129 + MACROBLOCK *const x = &cpi->mb; 1.130 + MACROBLOCKD *const xd = &x->e_mbd; 1.131 + unsigned int err; 1.132 + 1.133 + // Try zero MV first 1.134 + // FIXME should really use something like near/nearest MV and/or MV prediction 1.135 + err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, 1.136 + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, 1.137 + INT_MAX); 1.138 + 1.139 + dst_mv->as_int = 0; 1.140 + 1.141 + return err; 1.142 +} 1.143 +static int find_best_16x16_intra(VP9_COMP *cpi, 1.144 + int mb_y_offset, 1.145 + MB_PREDICTION_MODE *pbest_mode) { 1.146 + MACROBLOCK *const x = &cpi->mb; 1.147 + MACROBLOCKD *const xd = &x->e_mbd; 1.148 + MB_PREDICTION_MODE best_mode = -1, mode; 1.149 + unsigned int best_err = INT_MAX; 1.150 + 1.151 + // calculate SATD for each intra prediction mode; 1.152 + // we're intentionally not doing 4x4, we just want a rough estimate 1.153 + for (mode = DC_PRED; mode <= TM_PRED; mode++) { 1.154 + unsigned int err; 1.155 + 1.156 + xd->mi_8x8[0]->mbmi.mode = mode; 1.157 + vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode, 1.158 + x->plane[0].src.buf, x->plane[0].src.stride, 1.159 + xd->plane[0].dst.buf, xd->plane[0].dst.stride); 1.160 + err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, 1.161 + xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err); 1.162 + 1.163 + // find best 1.164 + if (err < best_err) { 1.165 + best_err = err; 1.166 + best_mode = mode; 1.167 + } 1.168 + } 1.169 + 1.170 + if (pbest_mode) 1.171 + *pbest_mode = best_mode; 1.172 + 1.173 + return best_err; 1.174 +} 1.175 + 1.176 +static void update_mbgraph_mb_stats 1.177 +( 1.178 + VP9_COMP *cpi, 1.179 + MBGRAPH_MB_STATS *stats, 1.180 + YV12_BUFFER_CONFIG *buf, 1.181 + int mb_y_offset, 1.182 + YV12_BUFFER_CONFIG *golden_ref, 1.183 + int_mv *prev_golden_ref_mv, 1.184 + int gld_y_offset, 1.185 + YV12_BUFFER_CONFIG *alt_ref, 1.186 + int_mv *prev_alt_ref_mv, 1.187 + int arf_y_offset, 1.188 + int mb_row, 1.189 + int mb_col 1.190 +) { 1.191 + MACROBLOCK *const x = &cpi->mb; 1.192 + MACROBLOCKD *const xd = &x->e_mbd; 1.193 + int intra_error; 1.194 + VP9_COMMON *cm = &cpi->common; 1.195 + 1.196 + // FIXME in practice we're completely ignoring chroma here 1.197 + x->plane[0].src.buf = buf->y_buffer + mb_y_offset; 1.198 + x->plane[0].src.stride = buf->y_stride; 1.199 + 1.200 + xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset; 1.201 + xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride; 1.202 + 1.203 + // do intra 16x16 prediction 1.204 + intra_error = find_best_16x16_intra(cpi, mb_y_offset, 1.205 + &stats->ref[INTRA_FRAME].m.mode); 1.206 + if (intra_error <= 0) 1.207 + intra_error = 1; 1.208 + stats->ref[INTRA_FRAME].err = intra_error; 1.209 + 1.210 + // Golden frame MV search, if it exists and is different than last frame 1.211 + if (golden_ref) { 1.212 + int g_motion_error; 1.213 + xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset; 1.214 + xd->plane[0].pre[0].stride = golden_ref->y_stride; 1.215 + g_motion_error = do_16x16_motion_search(cpi, 1.216 + prev_golden_ref_mv, 1.217 + &stats->ref[GOLDEN_FRAME].m.mv, 1.218 + mb_row, mb_col); 1.219 + stats->ref[GOLDEN_FRAME].err = g_motion_error; 1.220 + } else { 1.221 + stats->ref[GOLDEN_FRAME].err = INT_MAX; 1.222 + stats->ref[GOLDEN_FRAME].m.mv.as_int = 0; 1.223 + } 1.224 + 1.225 + // Do an Alt-ref frame MV search, if it exists and is different than 1.226 + // last/golden frame. 1.227 + if (alt_ref) { 1.228 + int a_motion_error; 1.229 + xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; 1.230 + xd->plane[0].pre[0].stride = alt_ref->y_stride; 1.231 + a_motion_error = do_16x16_zerozero_search(cpi, 1.232 + &stats->ref[ALTREF_FRAME].m.mv); 1.233 + 1.234 + stats->ref[ALTREF_FRAME].err = a_motion_error; 1.235 + } else { 1.236 + stats->ref[ALTREF_FRAME].err = INT_MAX; 1.237 + stats->ref[ALTREF_FRAME].m.mv.as_int = 0; 1.238 + } 1.239 +} 1.240 + 1.241 +static void update_mbgraph_frame_stats(VP9_COMP *cpi, 1.242 + MBGRAPH_FRAME_STATS *stats, 1.243 + YV12_BUFFER_CONFIG *buf, 1.244 + YV12_BUFFER_CONFIG *golden_ref, 1.245 + YV12_BUFFER_CONFIG *alt_ref) { 1.246 + MACROBLOCK *const x = &cpi->mb; 1.247 + MACROBLOCKD *const xd = &x->e_mbd; 1.248 + VP9_COMMON *const cm = &cpi->common; 1.249 + 1.250 + int mb_col, mb_row, offset = 0; 1.251 + int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; 1.252 + int_mv arf_top_mv, gld_top_mv; 1.253 + MODE_INFO mi_local = { { 0 } }; 1.254 + 1.255 + // Set up limit values for motion vectors to prevent them extending outside 1.256 + // the UMV borders. 1.257 + arf_top_mv.as_int = 0; 1.258 + gld_top_mv.as_int = 0; 1.259 + x->mv_row_min = -BORDER_MV_PIXELS_B16; 1.260 + x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16; 1.261 + xd->up_available = 0; 1.262 + xd->plane[0].dst.stride = buf->y_stride; 1.263 + xd->plane[0].pre[0].stride = buf->y_stride; 1.264 + xd->plane[1].dst.stride = buf->uv_stride; 1.265 + xd->mi_8x8[0] = &mi_local; 1.266 + mi_local.mbmi.sb_type = BLOCK_16X16; 1.267 + mi_local.mbmi.ref_frame[0] = LAST_FRAME; 1.268 + mi_local.mbmi.ref_frame[1] = NONE; 1.269 + 1.270 + for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { 1.271 + int_mv arf_left_mv, gld_left_mv; 1.272 + int mb_y_in_offset = mb_y_offset; 1.273 + int arf_y_in_offset = arf_y_offset; 1.274 + int gld_y_in_offset = gld_y_offset; 1.275 + 1.276 + // Set up limit values for motion vectors to prevent them extending outside 1.277 + // the UMV borders. 1.278 + arf_left_mv.as_int = arf_top_mv.as_int; 1.279 + gld_left_mv.as_int = gld_top_mv.as_int; 1.280 + x->mv_col_min = -BORDER_MV_PIXELS_B16; 1.281 + x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16; 1.282 + xd->left_available = 0; 1.283 + 1.284 + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { 1.285 + MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col]; 1.286 + 1.287 + update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, 1.288 + golden_ref, &gld_left_mv, gld_y_in_offset, 1.289 + alt_ref, &arf_left_mv, arf_y_in_offset, 1.290 + mb_row, mb_col); 1.291 + arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int; 1.292 + gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int; 1.293 + if (mb_col == 0) { 1.294 + arf_top_mv.as_int = arf_left_mv.as_int; 1.295 + gld_top_mv.as_int = gld_left_mv.as_int; 1.296 + } 1.297 + xd->left_available = 1; 1.298 + mb_y_in_offset += 16; 1.299 + gld_y_in_offset += 16; 1.300 + arf_y_in_offset += 16; 1.301 + x->mv_col_min -= 16; 1.302 + x->mv_col_max -= 16; 1.303 + } 1.304 + xd->up_available = 1; 1.305 + mb_y_offset += buf->y_stride * 16; 1.306 + gld_y_offset += golden_ref->y_stride * 16; 1.307 + if (alt_ref) 1.308 + arf_y_offset += alt_ref->y_stride * 16; 1.309 + x->mv_row_min -= 16; 1.310 + x->mv_row_max -= 16; 1.311 + offset += cm->mb_cols; 1.312 + } 1.313 +} 1.314 + 1.315 +// void separate_arf_mbs_byzz 1.316 +static void separate_arf_mbs(VP9_COMP *cpi) { 1.317 + VP9_COMMON *const cm = &cpi->common; 1.318 + int mb_col, mb_row, offset, i; 1.319 + int mi_row, mi_col; 1.320 + int ncnt[4] = { 0 }; 1.321 + int n_frames = cpi->mbgraph_n_frames; 1.322 + 1.323 + int *arf_not_zz; 1.324 + 1.325 + CHECK_MEM_ERROR(cm, arf_not_zz, 1.326 + vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1.327 + 1)); 1.328 + 1.329 + // We are not interested in results beyond the alt ref itself. 1.330 + if (n_frames > cpi->frames_till_gf_update_due) 1.331 + n_frames = cpi->frames_till_gf_update_due; 1.332 + 1.333 + // defer cost to reference frames 1.334 + for (i = n_frames - 1; i >= 0; i--) { 1.335 + MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; 1.336 + 1.337 + for (offset = 0, mb_row = 0; mb_row < cm->mb_rows; 1.338 + offset += cm->mb_cols, mb_row++) { 1.339 + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { 1.340 + MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col]; 1.341 + 1.342 + int altref_err = mb_stats->ref[ALTREF_FRAME].err; 1.343 + int intra_err = mb_stats->ref[INTRA_FRAME ].err; 1.344 + int golden_err = mb_stats->ref[GOLDEN_FRAME].err; 1.345 + 1.346 + // Test for altref vs intra and gf and that its mv was 0,0. 1.347 + if (altref_err > 1000 || 1.348 + altref_err > intra_err || 1.349 + altref_err > golden_err) { 1.350 + arf_not_zz[offset + mb_col]++; 1.351 + } 1.352 + } 1.353 + } 1.354 + } 1.355 + 1.356 + // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out 1.357 + // of bound access in segmentation_map 1.358 + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { 1.359 + for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { 1.360 + // If any of the blocks in the sequence failed then the MB 1.361 + // goes in segment 0 1.362 + if (arf_not_zz[mi_row/2*cm->mb_cols + mi_col/2]) { 1.363 + ncnt[0]++; 1.364 + cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0; 1.365 + } else { 1.366 + cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1; 1.367 + ncnt[1]++; 1.368 + } 1.369 + } 1.370 + } 1.371 + 1.372 + // Only bother with segmentation if over 10% of the MBs in static segment 1.373 + // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) ) 1.374 + if (1) { 1.375 + // Note % of blocks that are marked as static 1.376 + if (cm->MBs) 1.377 + cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols); 1.378 + 1.379 + // This error case should not be reachable as this function should 1.380 + // never be called with the common data structure uninitialized. 1.381 + else 1.382 + cpi->static_mb_pct = 0; 1.383 + 1.384 + cpi->seg0_cnt = ncnt[0]; 1.385 + vp9_enable_segmentation((VP9_PTR)cpi); 1.386 + } else { 1.387 + cpi->static_mb_pct = 0; 1.388 + vp9_disable_segmentation((VP9_PTR)cpi); 1.389 + } 1.390 + 1.391 + // Free localy allocated storage 1.392 + vpx_free(arf_not_zz); 1.393 +} 1.394 + 1.395 +void vp9_update_mbgraph_stats(VP9_COMP *cpi) { 1.396 + VP9_COMMON *const cm = &cpi->common; 1.397 + int i, n_frames = vp9_lookahead_depth(cpi->lookahead); 1.398 + YV12_BUFFER_CONFIG *golden_ref = 1.399 + &cm->yv12_fb[cm->ref_frame_map[cpi->gld_fb_idx]]; 1.400 + 1.401 + // we need to look ahead beyond where the ARF transitions into 1.402 + // being a GF - so exit if we don't look ahead beyond that 1.403 + if (n_frames <= cpi->frames_till_gf_update_due) 1.404 + return; 1.405 + if (n_frames > (int)cpi->frames_till_alt_ref_frame) 1.406 + n_frames = cpi->frames_till_alt_ref_frame; 1.407 + if (n_frames > MAX_LAG_BUFFERS) 1.408 + n_frames = MAX_LAG_BUFFERS; 1.409 + 1.410 + cpi->mbgraph_n_frames = n_frames; 1.411 + for (i = 0; i < n_frames; i++) { 1.412 + MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; 1.413 + vpx_memset(frame_stats->mb_stats, 0, 1.414 + cm->mb_rows * cm->mb_cols * 1.415 + sizeof(*cpi->mbgraph_stats[i].mb_stats)); 1.416 + } 1.417 + 1.418 + // do motion search to find contribution of each reference to data 1.419 + // later on in this GF group 1.420 + // FIXME really, the GF/last MC search should be done forward, and 1.421 + // the ARF MC search backwards, to get optimal results for MV caching 1.422 + for (i = 0; i < n_frames; i++) { 1.423 + MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; 1.424 + struct lookahead_entry *q_cur = vp9_lookahead_peek(cpi->lookahead, i); 1.425 + 1.426 + assert(q_cur != NULL); 1.427 + 1.428 + update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, 1.429 + golden_ref, cpi->Source); 1.430 + } 1.431 + 1.432 + vp9_clear_system_state(); // __asm emms; 1.433 + 1.434 + separate_arf_mbs(cpi); 1.435 +}