1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/decoder/threading.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,910 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 + 1.15 +#include "vpx_config.h" 1.16 +#include "vp8_rtcd.h" 1.17 +#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 1.18 +# include <unistd.h> 1.19 +#endif 1.20 +#include "onyxd_int.h" 1.21 +#include "vpx_mem/vpx_mem.h" 1.22 +#include "vp8/common/threading.h" 1.23 + 1.24 +#include "vp8/common/loopfilter.h" 1.25 +#include "vp8/common/extend.h" 1.26 +#include "vpx_ports/vpx_timer.h" 1.27 +#include "detokenize.h" 1.28 +#include "vp8/common/reconintra4x4.h" 1.29 +#include "vp8/common/reconinter.h" 1.30 +#include "vp8/common/setupintrarecon.h" 1.31 +#if CONFIG_ERROR_CONCEALMENT 1.32 +#include "error_concealment.h" 1.33 +#endif 1.34 + 1.35 +#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) 1.36 +#define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \ 1.37 + CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ 1.38 + memset((p), 0, (n) * sizeof(*(p))); \ 1.39 +} while (0) 1.40 + 1.41 + 1.42 +void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); 1.43 + 1.44 +static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) 1.45 +{ 1.46 + VP8_COMMON *const pc = & pbi->common; 1.47 + int i; 1.48 + 1.49 + for (i = 0; i < count; i++) 1.50 + { 1.51 + MACROBLOCKD *mbd = &mbrd[i].mbd; 1.52 + mbd->subpixel_predict = xd->subpixel_predict; 1.53 + mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; 1.54 + mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; 1.55 + mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; 1.56 + 1.57 + mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1); 1.58 + mbd->mode_info_stride = pc->mode_info_stride; 1.59 + 1.60 + mbd->frame_type = pc->frame_type; 1.61 + mbd->pre = xd->pre; 1.62 + mbd->dst = xd->dst; 1.63 + 1.64 + mbd->segmentation_enabled = xd->segmentation_enabled; 1.65 + mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; 1.66 + vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); 1.67 + 1.68 + /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ 1.69 + vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); 1.70 + /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ 1.71 + vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); 1.72 + /*unsigned char mode_ref_lf_delta_enabled; 1.73 + unsigned char mode_ref_lf_delta_update;*/ 1.74 + mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; 1.75 + mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; 1.76 + 1.77 + mbd->current_bc = &pbi->mbc[0]; 1.78 + 1.79 + vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); 1.80 + vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); 1.81 + vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); 1.82 + vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); 1.83 + 1.84 + mbd->fullpixel_mask = 0xffffffff; 1.85 + 1.86 + if (pc->full_pixel) 1.87 + mbd->fullpixel_mask = 0xfffffff8; 1.88 + 1.89 + } 1.90 + 1.91 + for (i = 0; i < pc->mb_rows; i++) 1.92 + pbi->mt_current_mb_col[i] = -1; 1.93 +} 1.94 + 1.95 +static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, 1.96 + unsigned int mb_idx) 1.97 +{ 1.98 + MB_PREDICTION_MODE mode; 1.99 + int i; 1.100 +#if CONFIG_ERROR_CONCEALMENT 1.101 + int corruption_detected = 0; 1.102 +#endif 1.103 + 1.104 + if (xd->mode_info_context->mbmi.mb_skip_coeff) 1.105 + { 1.106 + vp8_reset_mb_tokens_context(xd); 1.107 + } 1.108 + else if (!vp8dx_bool_error(xd->current_bc)) 1.109 + { 1.110 + int eobtotal; 1.111 + eobtotal = vp8_decode_mb_tokens(pbi, xd); 1.112 + 1.113 + /* Special case: Force the loopfilter to skip when eobtotal is zero */ 1.114 + xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0); 1.115 + } 1.116 + 1.117 + mode = xd->mode_info_context->mbmi.mode; 1.118 + 1.119 + if (xd->segmentation_enabled) 1.120 + vp8_mb_init_dequantizer(pbi, xd); 1.121 + 1.122 + 1.123 +#if CONFIG_ERROR_CONCEALMENT 1.124 + 1.125 + if(pbi->ec_active) 1.126 + { 1.127 + int throw_residual; 1.128 + /* When we have independent partitions we can apply residual even 1.129 + * though other partitions within the frame are corrupt. 1.130 + */ 1.131 + throw_residual = (!pbi->independent_partitions && 1.132 + pbi->frame_corrupt_residual); 1.133 + throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); 1.134 + 1.135 + if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) 1.136 + { 1.137 + /* MB with corrupt residuals or corrupt mode/motion vectors. 1.138 + * Better to use the predictor as reconstruction. 1.139 + */ 1.140 + pbi->frame_corrupt_residual = 1; 1.141 + vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); 1.142 + vp8_conceal_corrupt_mb(xd); 1.143 + 1.144 + 1.145 + corruption_detected = 1; 1.146 + 1.147 + /* force idct to be skipped for B_PRED and use the 1.148 + * prediction only for reconstruction 1.149 + * */ 1.150 + vpx_memset(xd->eobs, 0, 25); 1.151 + } 1.152 + } 1.153 +#endif 1.154 + 1.155 + /* do prediction */ 1.156 + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) 1.157 + { 1.158 + vp8_build_intra_predictors_mbuv_s(xd, 1.159 + xd->recon_above[1], 1.160 + xd->recon_above[2], 1.161 + xd->recon_left[1], 1.162 + xd->recon_left[2], 1.163 + xd->recon_left_stride[1], 1.164 + xd->dst.u_buffer, xd->dst.v_buffer, 1.165 + xd->dst.uv_stride); 1.166 + 1.167 + if (mode != B_PRED) 1.168 + { 1.169 + vp8_build_intra_predictors_mby_s(xd, 1.170 + xd->recon_above[0], 1.171 + xd->recon_left[0], 1.172 + xd->recon_left_stride[0], 1.173 + xd->dst.y_buffer, 1.174 + xd->dst.y_stride); 1.175 + } 1.176 + else 1.177 + { 1.178 + short *DQC = xd->dequant_y1; 1.179 + int dst_stride = xd->dst.y_stride; 1.180 + 1.181 + /* clear out residual eob info */ 1.182 + if(xd->mode_info_context->mbmi.mb_skip_coeff) 1.183 + vpx_memset(xd->eobs, 0, 25); 1.184 + 1.185 + intra_prediction_down_copy(xd, xd->recon_above[0] + 16); 1.186 + 1.187 + for (i = 0; i < 16; i++) 1.188 + { 1.189 + BLOCKD *b = &xd->block[i]; 1.190 + unsigned char *dst = xd->dst.y_buffer + b->offset; 1.191 + B_PREDICTION_MODE b_mode = 1.192 + xd->mode_info_context->bmi[i].as_mode; 1.193 + unsigned char *Above; 1.194 + unsigned char *yleft; 1.195 + int left_stride; 1.196 + unsigned char top_left; 1.197 + 1.198 + /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ 1.199 + if (i < 4 && pbi->common.filter_level) 1.200 + Above = xd->recon_above[0] + b->offset; 1.201 + else 1.202 + Above = dst - dst_stride; 1.203 + 1.204 + if (i%4==0 && pbi->common.filter_level) 1.205 + { 1.206 + yleft = xd->recon_left[0] + i; 1.207 + left_stride = 1; 1.208 + } 1.209 + else 1.210 + { 1.211 + yleft = dst - 1; 1.212 + left_stride = dst_stride; 1.213 + } 1.214 + 1.215 + if ((i==4 || i==8 || i==12) && pbi->common.filter_level) 1.216 + top_left = *(xd->recon_left[0] + i - 1); 1.217 + else 1.218 + top_left = Above[-1]; 1.219 + 1.220 + vp8_intra4x4_predict(Above, yleft, left_stride, 1.221 + b_mode, dst, dst_stride, top_left); 1.222 + 1.223 + if (xd->eobs[i] ) 1.224 + { 1.225 + if (xd->eobs[i] > 1) 1.226 + { 1.227 + vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); 1.228 + } 1.229 + else 1.230 + { 1.231 + vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], 1.232 + dst, dst_stride, dst, dst_stride); 1.233 + vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); 1.234 + } 1.235 + } 1.236 + } 1.237 + } 1.238 + } 1.239 + else 1.240 + { 1.241 + vp8_build_inter_predictors_mb(xd); 1.242 + } 1.243 + 1.244 + 1.245 +#if CONFIG_ERROR_CONCEALMENT 1.246 + if (corruption_detected) 1.247 + { 1.248 + return; 1.249 + } 1.250 +#endif 1.251 + 1.252 + if(!xd->mode_info_context->mbmi.mb_skip_coeff) 1.253 + { 1.254 + /* dequantization and idct */ 1.255 + if (mode != B_PRED) 1.256 + { 1.257 + short *DQC = xd->dequant_y1; 1.258 + 1.259 + if (mode != SPLITMV) 1.260 + { 1.261 + BLOCKD *b = &xd->block[24]; 1.262 + 1.263 + /* do 2nd order transform on the dc block */ 1.264 + if (xd->eobs[24] > 1) 1.265 + { 1.266 + vp8_dequantize_b(b, xd->dequant_y2); 1.267 + 1.268 + vp8_short_inv_walsh4x4(&b->dqcoeff[0], 1.269 + xd->qcoeff); 1.270 + vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); 1.271 + } 1.272 + else 1.273 + { 1.274 + b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; 1.275 + vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], 1.276 + xd->qcoeff); 1.277 + vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); 1.278 + } 1.279 + 1.280 + /* override the dc dequant constant in order to preserve the 1.281 + * dc components 1.282 + */ 1.283 + DQC = xd->dequant_y1_dc; 1.284 + } 1.285 + 1.286 + vp8_dequant_idct_add_y_block 1.287 + (xd->qcoeff, DQC, 1.288 + xd->dst.y_buffer, 1.289 + xd->dst.y_stride, xd->eobs); 1.290 + } 1.291 + 1.292 + vp8_dequant_idct_add_uv_block 1.293 + (xd->qcoeff+16*16, xd->dequant_uv, 1.294 + xd->dst.u_buffer, xd->dst.v_buffer, 1.295 + xd->dst.uv_stride, xd->eobs+16); 1.296 + } 1.297 +} 1.298 + 1.299 +static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) 1.300 +{ 1.301 + volatile const int *last_row_current_mb_col; 1.302 + volatile int *current_mb_col; 1.303 + int mb_row; 1.304 + VP8_COMMON *pc = &pbi->common; 1.305 + const int nsync = pbi->sync_range; 1.306 + const int first_row_no_sync_above = pc->mb_cols + nsync; 1.307 + int num_part = 1 << pbi->common.multi_token_partition; 1.308 + int last_mb_row = start_mb_row; 1.309 + 1.310 + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; 1.311 + YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; 1.312 + 1.313 + int recon_y_stride = yv12_fb_new->y_stride; 1.314 + int recon_uv_stride = yv12_fb_new->uv_stride; 1.315 + 1.316 + unsigned char *ref_buffer[MAX_REF_FRAMES][3]; 1.317 + unsigned char *dst_buffer[3]; 1.318 + int i; 1.319 + int ref_fb_corrupted[MAX_REF_FRAMES]; 1.320 + 1.321 + ref_fb_corrupted[INTRA_FRAME] = 0; 1.322 + 1.323 + for(i = 1; i < MAX_REF_FRAMES; i++) 1.324 + { 1.325 + YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; 1.326 + 1.327 + ref_buffer[i][0] = this_fb->y_buffer; 1.328 + ref_buffer[i][1] = this_fb->u_buffer; 1.329 + ref_buffer[i][2] = this_fb->v_buffer; 1.330 + 1.331 + ref_fb_corrupted[i] = this_fb->corrupted; 1.332 + } 1.333 + 1.334 + dst_buffer[0] = yv12_fb_new->y_buffer; 1.335 + dst_buffer[1] = yv12_fb_new->u_buffer; 1.336 + dst_buffer[2] = yv12_fb_new->v_buffer; 1.337 + 1.338 + xd->up_available = (start_mb_row != 0); 1.339 + 1.340 + for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) 1.341 + { 1.342 + int recon_yoffset, recon_uvoffset; 1.343 + int mb_col; 1.344 + int filter_level; 1.345 + loop_filter_info_n *lfi_n = &pc->lf_info; 1.346 + 1.347 + /* save last row processed by this thread */ 1.348 + last_mb_row = mb_row; 1.349 + /* select bool coder for current partition */ 1.350 + xd->current_bc = &pbi->mbc[mb_row%num_part]; 1.351 + 1.352 + if (mb_row > 0) 1.353 + last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; 1.354 + else 1.355 + last_row_current_mb_col = &first_row_no_sync_above; 1.356 + 1.357 + current_mb_col = &pbi->mt_current_mb_col[mb_row]; 1.358 + 1.359 + recon_yoffset = mb_row * recon_y_stride * 16; 1.360 + recon_uvoffset = mb_row * recon_uv_stride * 8; 1.361 + 1.362 + /* reset contexts */ 1.363 + xd->above_context = pc->above_context; 1.364 + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); 1.365 + 1.366 + xd->left_available = 0; 1.367 + 1.368 + xd->mb_to_top_edge = -((mb_row * 16)) << 3; 1.369 + xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; 1.370 + 1.371 + if (pbi->common.filter_level) 1.372 + { 1.373 + xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32; 1.374 + xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16; 1.375 + xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16; 1.376 + 1.377 + xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; 1.378 + xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; 1.379 + xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; 1.380 + 1.381 + /* TODO: move to outside row loop */ 1.382 + xd->recon_left_stride[0] = 1; 1.383 + xd->recon_left_stride[1] = 1; 1.384 + } 1.385 + else 1.386 + { 1.387 + xd->recon_above[0] = dst_buffer[0] + recon_yoffset; 1.388 + xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; 1.389 + xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; 1.390 + 1.391 + xd->recon_left[0] = xd->recon_above[0] - 1; 1.392 + xd->recon_left[1] = xd->recon_above[1] - 1; 1.393 + xd->recon_left[2] = xd->recon_above[2] - 1; 1.394 + 1.395 + xd->recon_above[0] -= xd->dst.y_stride; 1.396 + xd->recon_above[1] -= xd->dst.uv_stride; 1.397 + xd->recon_above[2] -= xd->dst.uv_stride; 1.398 + 1.399 + /* TODO: move to outside row loop */ 1.400 + xd->recon_left_stride[0] = xd->dst.y_stride; 1.401 + xd->recon_left_stride[1] = xd->dst.uv_stride; 1.402 + 1.403 + setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], 1.404 + xd->recon_left[2], xd->dst.y_stride, 1.405 + xd->dst.uv_stride); 1.406 + } 1.407 + 1.408 + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) 1.409 + { 1.410 + *current_mb_col = mb_col - 1; 1.411 + 1.412 + if ((mb_col & (nsync - 1)) == 0) 1.413 + { 1.414 + while (mb_col > (*last_row_current_mb_col - nsync)) 1.415 + { 1.416 + x86_pause_hint(); 1.417 + thread_sleep(0); 1.418 + } 1.419 + } 1.420 + 1.421 + /* Distance of MB to the various image edges. 1.422 + * These are specified to 8th pel as they are always 1.423 + * compared to values that are in 1/8th pel units. 1.424 + */ 1.425 + xd->mb_to_left_edge = -((mb_col * 16) << 3); 1.426 + xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; 1.427 + 1.428 + #if CONFIG_ERROR_CONCEALMENT 1.429 + { 1.430 + int corrupt_residual = 1.431 + (!pbi->independent_partitions && 1.432 + pbi->frame_corrupt_residual) || 1.433 + vp8dx_bool_error(xd->current_bc); 1.434 + if (pbi->ec_active && 1.435 + (xd->mode_info_context->mbmi.ref_frame == 1.436 + INTRA_FRAME) && 1.437 + corrupt_residual) 1.438 + { 1.439 + /* We have an intra block with corrupt 1.440 + * coefficients, better to conceal with an inter 1.441 + * block. 1.442 + * Interpolate MVs from neighboring MBs 1.443 + * 1.444 + * Note that for the first mb with corrupt 1.445 + * residual in a frame, we might not discover 1.446 + * that before decoding the residual. That 1.447 + * happens after this check, and therefore no 1.448 + * inter concealment will be done. 1.449 + */ 1.450 + vp8_interpolate_motion(xd, 1.451 + mb_row, mb_col, 1.452 + pc->mb_rows, pc->mb_cols, 1.453 + pc->mode_info_stride); 1.454 + } 1.455 + } 1.456 + #endif 1.457 + 1.458 + 1.459 + xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; 1.460 + xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; 1.461 + xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; 1.462 + 1.463 + xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset; 1.464 + xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset; 1.465 + xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset; 1.466 + 1.467 + /* propagate errors from reference frames */ 1.468 + xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; 1.469 + 1.470 + mt_decode_macroblock(pbi, xd, 0); 1.471 + 1.472 + xd->left_available = 1; 1.473 + 1.474 + /* check if the boolean decoder has suffered an error */ 1.475 + xd->corrupted |= vp8dx_bool_error(xd->current_bc); 1.476 + 1.477 + xd->recon_above[0] += 16; 1.478 + xd->recon_above[1] += 8; 1.479 + xd->recon_above[2] += 8; 1.480 + 1.481 + if (!pbi->common.filter_level) 1.482 + { 1.483 + xd->recon_left[0] += 16; 1.484 + xd->recon_left[1] += 8; 1.485 + xd->recon_left[2] += 8; 1.486 + } 1.487 + 1.488 + if (pbi->common.filter_level) 1.489 + { 1.490 + int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && 1.491 + xd->mode_info_context->mbmi.mode != SPLITMV && 1.492 + xd->mode_info_context->mbmi.mb_skip_coeff); 1.493 + 1.494 + const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; 1.495 + const int seg = xd->mode_info_context->mbmi.segment_id; 1.496 + const int ref_frame = xd->mode_info_context->mbmi.ref_frame; 1.497 + 1.498 + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; 1.499 + 1.500 + if( mb_row != pc->mb_rows-1 ) 1.501 + { 1.502 + /* Save decoded MB last row data for next-row decoding */ 1.503 + vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); 1.504 + vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); 1.505 + vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); 1.506 + } 1.507 + 1.508 + /* save left_col for next MB decoding */ 1.509 + if(mb_col != pc->mb_cols-1) 1.510 + { 1.511 + MODE_INFO *next = xd->mode_info_context +1; 1.512 + 1.513 + if (next->mbmi.ref_frame == INTRA_FRAME) 1.514 + { 1.515 + for (i = 0; i < 16; i++) 1.516 + pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; 1.517 + for (i = 0; i < 8; i++) 1.518 + { 1.519 + pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; 1.520 + pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; 1.521 + } 1.522 + } 1.523 + } 1.524 + 1.525 + /* loopfilter on this macroblock. */ 1.526 + if (filter_level) 1.527 + { 1.528 + if(pc->filter_type == NORMAL_LOOPFILTER) 1.529 + { 1.530 + loop_filter_info lfi; 1.531 + FRAME_TYPE frame_type = pc->frame_type; 1.532 + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; 1.533 + lfi.mblim = lfi_n->mblim[filter_level]; 1.534 + lfi.blim = lfi_n->blim[filter_level]; 1.535 + lfi.lim = lfi_n->lim[filter_level]; 1.536 + lfi.hev_thr = lfi_n->hev_thr[hev_index]; 1.537 + 1.538 + if (mb_col > 0) 1.539 + vp8_loop_filter_mbv 1.540 + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); 1.541 + 1.542 + if (!skip_lf) 1.543 + vp8_loop_filter_bv 1.544 + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); 1.545 + 1.546 + /* don't apply across umv border */ 1.547 + if (mb_row > 0) 1.548 + vp8_loop_filter_mbh 1.549 + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); 1.550 + 1.551 + if (!skip_lf) 1.552 + vp8_loop_filter_bh 1.553 + (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); 1.554 + } 1.555 + else 1.556 + { 1.557 + if (mb_col > 0) 1.558 + vp8_loop_filter_simple_mbv 1.559 + (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); 1.560 + 1.561 + if (!skip_lf) 1.562 + vp8_loop_filter_simple_bv 1.563 + (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); 1.564 + 1.565 + /* don't apply across umv border */ 1.566 + if (mb_row > 0) 1.567 + vp8_loop_filter_simple_mbh 1.568 + (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); 1.569 + 1.570 + if (!skip_lf) 1.571 + vp8_loop_filter_simple_bh 1.572 + (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); 1.573 + } 1.574 + } 1.575 + 1.576 + } 1.577 + 1.578 + recon_yoffset += 16; 1.579 + recon_uvoffset += 8; 1.580 + 1.581 + ++xd->mode_info_context; /* next mb */ 1.582 + 1.583 + xd->above_context++; 1.584 + } 1.585 + 1.586 + /* adjust to the next row of mbs */ 1.587 + if (pbi->common.filter_level) 1.588 + { 1.589 + if(mb_row != pc->mb_rows-1) 1.590 + { 1.591 + int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; 1.592 + int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1); 1.593 + 1.594 + for (i = 0; i < 4; i++) 1.595 + { 1.596 + pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; 1.597 + pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; 1.598 + pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; 1.599 + } 1.600 + } 1.601 + } 1.602 + else 1.603 + vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, 1.604 + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); 1.605 + 1.606 + /* last MB of row is ready just after extension is done */ 1.607 + *current_mb_col = mb_col + nsync; 1.608 + 1.609 + ++xd->mode_info_context; /* skip prediction column */ 1.610 + xd->up_available = 1; 1.611 + 1.612 + /* since we have multithread */ 1.613 + xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; 1.614 + } 1.615 + 1.616 + /* signal end of frame decoding if this thread processed the last mb_row */ 1.617 + if (last_mb_row == (pc->mb_rows - 1)) 1.618 + sem_post(&pbi->h_event_end_decoding); 1.619 + 1.620 +} 1.621 + 1.622 + 1.623 +static THREAD_FUNCTION thread_decoding_proc(void *p_data) 1.624 +{ 1.625 + int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; 1.626 + VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); 1.627 + MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); 1.628 + ENTROPY_CONTEXT_PLANES mb_row_left_context; 1.629 + 1.630 + while (1) 1.631 + { 1.632 + if (pbi->b_multithreaded_rd == 0) 1.633 + break; 1.634 + 1.635 + if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) 1.636 + { 1.637 + if (pbi->b_multithreaded_rd == 0) 1.638 + break; 1.639 + else 1.640 + { 1.641 + MACROBLOCKD *xd = &mbrd->mbd; 1.642 + xd->left_context = &mb_row_left_context; 1.643 + 1.644 + mt_decode_mb_rows(pbi, xd, ithread+1); 1.645 + } 1.646 + } 1.647 + } 1.648 + 1.649 + return 0 ; 1.650 +} 1.651 + 1.652 + 1.653 +void vp8_decoder_create_threads(VP8D_COMP *pbi) 1.654 +{ 1.655 + int core_count = 0; 1.656 + unsigned int ithread; 1.657 + 1.658 + pbi->b_multithreaded_rd = 0; 1.659 + pbi->allocated_decoding_thread_count = 0; 1.660 + 1.661 + /* limit decoding threads to the max number of token partitions */ 1.662 + core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; 1.663 + 1.664 + /* limit decoding threads to the available cores */ 1.665 + if (core_count > pbi->common.processor_core_count) 1.666 + core_count = pbi->common.processor_core_count; 1.667 + 1.668 + if (core_count > 1) 1.669 + { 1.670 + pbi->b_multithreaded_rd = 1; 1.671 + pbi->decoding_thread_count = core_count - 1; 1.672 + 1.673 + CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); 1.674 + CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); 1.675 + CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); 1.676 + CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); 1.677 + 1.678 + for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) 1.679 + { 1.680 + sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); 1.681 + 1.682 + vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); 1.683 + 1.684 + pbi->de_thread_data[ithread].ithread = ithread; 1.685 + pbi->de_thread_data[ithread].ptr1 = (void *)pbi; 1.686 + pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; 1.687 + 1.688 + pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread])); 1.689 + } 1.690 + 1.691 + sem_init(&pbi->h_event_end_decoding, 0, 0); 1.692 + 1.693 + pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; 1.694 + } 1.695 +} 1.696 + 1.697 + 1.698 +void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) 1.699 +{ 1.700 + int i; 1.701 + 1.702 + if (pbi->b_multithreaded_rd) 1.703 + { 1.704 + vpx_free(pbi->mt_current_mb_col); 1.705 + pbi->mt_current_mb_col = NULL ; 1.706 + 1.707 + /* Free above_row buffers. */ 1.708 + if (pbi->mt_yabove_row) 1.709 + { 1.710 + for (i=0; i< mb_rows; i++) 1.711 + { 1.712 + vpx_free(pbi->mt_yabove_row[i]); 1.713 + pbi->mt_yabove_row[i] = NULL ; 1.714 + } 1.715 + vpx_free(pbi->mt_yabove_row); 1.716 + pbi->mt_yabove_row = NULL ; 1.717 + } 1.718 + 1.719 + if (pbi->mt_uabove_row) 1.720 + { 1.721 + for (i=0; i< mb_rows; i++) 1.722 + { 1.723 + vpx_free(pbi->mt_uabove_row[i]); 1.724 + pbi->mt_uabove_row[i] = NULL ; 1.725 + } 1.726 + vpx_free(pbi->mt_uabove_row); 1.727 + pbi->mt_uabove_row = NULL ; 1.728 + } 1.729 + 1.730 + if (pbi->mt_vabove_row) 1.731 + { 1.732 + for (i=0; i< mb_rows; i++) 1.733 + { 1.734 + vpx_free(pbi->mt_vabove_row[i]); 1.735 + pbi->mt_vabove_row[i] = NULL ; 1.736 + } 1.737 + vpx_free(pbi->mt_vabove_row); 1.738 + pbi->mt_vabove_row = NULL ; 1.739 + } 1.740 + 1.741 + /* Free left_col buffers. */ 1.742 + if (pbi->mt_yleft_col) 1.743 + { 1.744 + for (i=0; i< mb_rows; i++) 1.745 + { 1.746 + vpx_free(pbi->mt_yleft_col[i]); 1.747 + pbi->mt_yleft_col[i] = NULL ; 1.748 + } 1.749 + vpx_free(pbi->mt_yleft_col); 1.750 + pbi->mt_yleft_col = NULL ; 1.751 + } 1.752 + 1.753 + if (pbi->mt_uleft_col) 1.754 + { 1.755 + for (i=0; i< mb_rows; i++) 1.756 + { 1.757 + vpx_free(pbi->mt_uleft_col[i]); 1.758 + pbi->mt_uleft_col[i] = NULL ; 1.759 + } 1.760 + vpx_free(pbi->mt_uleft_col); 1.761 + pbi->mt_uleft_col = NULL ; 1.762 + } 1.763 + 1.764 + if (pbi->mt_vleft_col) 1.765 + { 1.766 + for (i=0; i< mb_rows; i++) 1.767 + { 1.768 + vpx_free(pbi->mt_vleft_col[i]); 1.769 + pbi->mt_vleft_col[i] = NULL ; 1.770 + } 1.771 + vpx_free(pbi->mt_vleft_col); 1.772 + pbi->mt_vleft_col = NULL ; 1.773 + } 1.774 + } 1.775 +} 1.776 + 1.777 + 1.778 +void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) 1.779 +{ 1.780 + VP8_COMMON *const pc = & pbi->common; 1.781 + int i; 1.782 + int uv_width; 1.783 + 1.784 + if (pbi->b_multithreaded_rd) 1.785 + { 1.786 + vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); 1.787 + 1.788 + /* our internal buffers are always multiples of 16 */ 1.789 + if ((width & 0xf) != 0) 1.790 + width += 16 - (width & 0xf); 1.791 + 1.792 + if (width < 640) pbi->sync_range = 1; 1.793 + else if (width <= 1280) pbi->sync_range = 8; 1.794 + else if (width <= 2560) pbi->sync_range =16; 1.795 + else pbi->sync_range = 32; 1.796 + 1.797 + uv_width = width >>1; 1.798 + 1.799 + /* Allocate an int for each mb row. */ 1.800 + CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); 1.801 + 1.802 + /* Allocate memory for above_row buffers. */ 1.803 + CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); 1.804 + for (i = 0; i < pc->mb_rows; i++) 1.805 + CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)))); 1.806 + 1.807 + CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); 1.808 + for (i = 0; i < pc->mb_rows; i++) 1.809 + CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); 1.810 + 1.811 + CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); 1.812 + for (i = 0; i < pc->mb_rows; i++) 1.813 + CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); 1.814 + 1.815 + /* Allocate memory for left_col buffers. */ 1.816 + CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); 1.817 + for (i = 0; i < pc->mb_rows; i++) 1.818 + CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); 1.819 + 1.820 + CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); 1.821 + for (i = 0; i < pc->mb_rows; i++) 1.822 + CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); 1.823 + 1.824 + CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); 1.825 + for (i = 0; i < pc->mb_rows; i++) 1.826 + CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); 1.827 + } 1.828 +} 1.829 + 1.830 + 1.831 +void vp8_decoder_remove_threads(VP8D_COMP *pbi) 1.832 +{ 1.833 + /* shutdown MB Decoding thread; */ 1.834 + if (pbi->b_multithreaded_rd) 1.835 + { 1.836 + int i; 1.837 + 1.838 + pbi->b_multithreaded_rd = 0; 1.839 + 1.840 + /* allow all threads to exit */ 1.841 + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) 1.842 + { 1.843 + sem_post(&pbi->h_event_start_decoding[i]); 1.844 + pthread_join(pbi->h_decoding_thread[i], NULL); 1.845 + } 1.846 + 1.847 + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) 1.848 + { 1.849 + sem_destroy(&pbi->h_event_start_decoding[i]); 1.850 + } 1.851 + 1.852 + sem_destroy(&pbi->h_event_end_decoding); 1.853 + 1.854 + vpx_free(pbi->h_decoding_thread); 1.855 + pbi->h_decoding_thread = NULL; 1.856 + 1.857 + vpx_free(pbi->h_event_start_decoding); 1.858 + pbi->h_event_start_decoding = NULL; 1.859 + 1.860 + vpx_free(pbi->mb_row_di); 1.861 + pbi->mb_row_di = NULL ; 1.862 + 1.863 + vpx_free(pbi->de_thread_data); 1.864 + pbi->de_thread_data = NULL; 1.865 + } 1.866 +} 1.867 + 1.868 +void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) 1.869 +{ 1.870 + VP8_COMMON *pc = &pbi->common; 1.871 + unsigned int i; 1.872 + int j; 1.873 + 1.874 + int filter_level = pc->filter_level; 1.875 + YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; 1.876 + 1.877 + if (filter_level) 1.878 + { 1.879 + /* Set above_row buffer to 127 for decoding first MB row */ 1.880 + vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5); 1.881 + vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); 1.882 + vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5); 1.883 + 1.884 + for (j=1; j<pc->mb_rows; j++) 1.885 + { 1.886 + vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); 1.887 + vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); 1.888 + vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); 1.889 + } 1.890 + 1.891 + /* Set left_col to 129 initially */ 1.892 + for (j=0; j<pc->mb_rows; j++) 1.893 + { 1.894 + vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); 1.895 + vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); 1.896 + vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); 1.897 + } 1.898 + 1.899 + /* Initialize the loop filter for this frame. */ 1.900 + vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); 1.901 + } 1.902 + else 1.903 + vp8_setup_intra_recon_top_line(yv12_fb_new); 1.904 + 1.905 + setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); 1.906 + 1.907 + for (i = 0; i < pbi->decoding_thread_count; i++) 1.908 + sem_post(&pbi->h_event_start_decoding[i]); 1.909 + 1.910 + mt_decode_mb_rows(pbi, xd, 0); 1.911 + 1.912 + sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ 1.913 +}