media/libvpx/vp8/decoder/threading.c

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /*
michael@0 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11
michael@0 12 #include "vpx_config.h"
michael@0 13 #include "vp8_rtcd.h"
michael@0 14 #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
michael@0 15 # include <unistd.h>
michael@0 16 #endif
michael@0 17 #include "onyxd_int.h"
michael@0 18 #include "vpx_mem/vpx_mem.h"
michael@0 19 #include "vp8/common/threading.h"
michael@0 20
michael@0 21 #include "vp8/common/loopfilter.h"
michael@0 22 #include "vp8/common/extend.h"
michael@0 23 #include "vpx_ports/vpx_timer.h"
michael@0 24 #include "detokenize.h"
michael@0 25 #include "vp8/common/reconintra4x4.h"
michael@0 26 #include "vp8/common/reconinter.h"
michael@0 27 #include "vp8/common/setupintrarecon.h"
michael@0 28 #if CONFIG_ERROR_CONCEALMENT
michael@0 29 #include "error_concealment.h"
michael@0 30 #endif
michael@0 31
michael@0 32 #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
michael@0 33 #define CALLOC_ARRAY_ALIGNED(p, n, algn) do { \
michael@0 34 CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
michael@0 35 memset((p), 0, (n) * sizeof(*(p))); \
michael@0 36 } while (0)
michael@0 37
michael@0 38
michael@0 39 void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
michael@0 40
michael@0 41 static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
michael@0 42 {
michael@0 43 VP8_COMMON *const pc = & pbi->common;
michael@0 44 int i;
michael@0 45
michael@0 46 for (i = 0; i < count; i++)
michael@0 47 {
michael@0 48 MACROBLOCKD *mbd = &mbrd[i].mbd;
michael@0 49 mbd->subpixel_predict = xd->subpixel_predict;
michael@0 50 mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
michael@0 51 mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
michael@0 52 mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
michael@0 53
michael@0 54 mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
michael@0 55 mbd->mode_info_stride = pc->mode_info_stride;
michael@0 56
michael@0 57 mbd->frame_type = pc->frame_type;
michael@0 58 mbd->pre = xd->pre;
michael@0 59 mbd->dst = xd->dst;
michael@0 60
michael@0 61 mbd->segmentation_enabled = xd->segmentation_enabled;
michael@0 62 mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
michael@0 63 vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
michael@0 64
michael@0 65 /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
michael@0 66 vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
michael@0 67 /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
michael@0 68 vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
michael@0 69 /*unsigned char mode_ref_lf_delta_enabled;
michael@0 70 unsigned char mode_ref_lf_delta_update;*/
michael@0 71 mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
michael@0 72 mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
michael@0 73
michael@0 74 mbd->current_bc = &pbi->mbc[0];
michael@0 75
michael@0 76 vpx_memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
michael@0 77 vpx_memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
michael@0 78 vpx_memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
michael@0 79 vpx_memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
michael@0 80
michael@0 81 mbd->fullpixel_mask = 0xffffffff;
michael@0 82
michael@0 83 if (pc->full_pixel)
michael@0 84 mbd->fullpixel_mask = 0xfffffff8;
michael@0 85
michael@0 86 }
michael@0 87
michael@0 88 for (i = 0; i < pc->mb_rows; i++)
michael@0 89 pbi->mt_current_mb_col[i] = -1;
michael@0 90 }
michael@0 91
michael@0 92 static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
michael@0 93 unsigned int mb_idx)
michael@0 94 {
michael@0 95 MB_PREDICTION_MODE mode;
michael@0 96 int i;
michael@0 97 #if CONFIG_ERROR_CONCEALMENT
michael@0 98 int corruption_detected = 0;
michael@0 99 #endif
michael@0 100
michael@0 101 if (xd->mode_info_context->mbmi.mb_skip_coeff)
michael@0 102 {
michael@0 103 vp8_reset_mb_tokens_context(xd);
michael@0 104 }
michael@0 105 else if (!vp8dx_bool_error(xd->current_bc))
michael@0 106 {
michael@0 107 int eobtotal;
michael@0 108 eobtotal = vp8_decode_mb_tokens(pbi, xd);
michael@0 109
michael@0 110 /* Special case: Force the loopfilter to skip when eobtotal is zero */
michael@0 111 xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal==0);
michael@0 112 }
michael@0 113
michael@0 114 mode = xd->mode_info_context->mbmi.mode;
michael@0 115
michael@0 116 if (xd->segmentation_enabled)
michael@0 117 vp8_mb_init_dequantizer(pbi, xd);
michael@0 118
michael@0 119
michael@0 120 #if CONFIG_ERROR_CONCEALMENT
michael@0 121
michael@0 122 if(pbi->ec_active)
michael@0 123 {
michael@0 124 int throw_residual;
michael@0 125 /* When we have independent partitions we can apply residual even
michael@0 126 * though other partitions within the frame are corrupt.
michael@0 127 */
michael@0 128 throw_residual = (!pbi->independent_partitions &&
michael@0 129 pbi->frame_corrupt_residual);
michael@0 130 throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
michael@0 131
michael@0 132 if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual))
michael@0 133 {
michael@0 134 /* MB with corrupt residuals or corrupt mode/motion vectors.
michael@0 135 * Better to use the predictor as reconstruction.
michael@0 136 */
michael@0 137 pbi->frame_corrupt_residual = 1;
michael@0 138 vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
michael@0 139 vp8_conceal_corrupt_mb(xd);
michael@0 140
michael@0 141
michael@0 142 corruption_detected = 1;
michael@0 143
michael@0 144 /* force idct to be skipped for B_PRED and use the
michael@0 145 * prediction only for reconstruction
michael@0 146 * */
michael@0 147 vpx_memset(xd->eobs, 0, 25);
michael@0 148 }
michael@0 149 }
michael@0 150 #endif
michael@0 151
michael@0 152 /* do prediction */
michael@0 153 if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
michael@0 154 {
michael@0 155 vp8_build_intra_predictors_mbuv_s(xd,
michael@0 156 xd->recon_above[1],
michael@0 157 xd->recon_above[2],
michael@0 158 xd->recon_left[1],
michael@0 159 xd->recon_left[2],
michael@0 160 xd->recon_left_stride[1],
michael@0 161 xd->dst.u_buffer, xd->dst.v_buffer,
michael@0 162 xd->dst.uv_stride);
michael@0 163
michael@0 164 if (mode != B_PRED)
michael@0 165 {
michael@0 166 vp8_build_intra_predictors_mby_s(xd,
michael@0 167 xd->recon_above[0],
michael@0 168 xd->recon_left[0],
michael@0 169 xd->recon_left_stride[0],
michael@0 170 xd->dst.y_buffer,
michael@0 171 xd->dst.y_stride);
michael@0 172 }
michael@0 173 else
michael@0 174 {
michael@0 175 short *DQC = xd->dequant_y1;
michael@0 176 int dst_stride = xd->dst.y_stride;
michael@0 177
michael@0 178 /* clear out residual eob info */
michael@0 179 if(xd->mode_info_context->mbmi.mb_skip_coeff)
michael@0 180 vpx_memset(xd->eobs, 0, 25);
michael@0 181
michael@0 182 intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
michael@0 183
michael@0 184 for (i = 0; i < 16; i++)
michael@0 185 {
michael@0 186 BLOCKD *b = &xd->block[i];
michael@0 187 unsigned char *dst = xd->dst.y_buffer + b->offset;
michael@0 188 B_PREDICTION_MODE b_mode =
michael@0 189 xd->mode_info_context->bmi[i].as_mode;
michael@0 190 unsigned char *Above;
michael@0 191 unsigned char *yleft;
michael@0 192 int left_stride;
michael@0 193 unsigned char top_left;
michael@0 194
michael@0 195 /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
michael@0 196 if (i < 4 && pbi->common.filter_level)
michael@0 197 Above = xd->recon_above[0] + b->offset;
michael@0 198 else
michael@0 199 Above = dst - dst_stride;
michael@0 200
michael@0 201 if (i%4==0 && pbi->common.filter_level)
michael@0 202 {
michael@0 203 yleft = xd->recon_left[0] + i;
michael@0 204 left_stride = 1;
michael@0 205 }
michael@0 206 else
michael@0 207 {
michael@0 208 yleft = dst - 1;
michael@0 209 left_stride = dst_stride;
michael@0 210 }
michael@0 211
michael@0 212 if ((i==4 || i==8 || i==12) && pbi->common.filter_level)
michael@0 213 top_left = *(xd->recon_left[0] + i - 1);
michael@0 214 else
michael@0 215 top_left = Above[-1];
michael@0 216
michael@0 217 vp8_intra4x4_predict(Above, yleft, left_stride,
michael@0 218 b_mode, dst, dst_stride, top_left);
michael@0 219
michael@0 220 if (xd->eobs[i] )
michael@0 221 {
michael@0 222 if (xd->eobs[i] > 1)
michael@0 223 {
michael@0 224 vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
michael@0 225 }
michael@0 226 else
michael@0 227 {
michael@0 228 vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0],
michael@0 229 dst, dst_stride, dst, dst_stride);
michael@0 230 vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
michael@0 231 }
michael@0 232 }
michael@0 233 }
michael@0 234 }
michael@0 235 }
michael@0 236 else
michael@0 237 {
michael@0 238 vp8_build_inter_predictors_mb(xd);
michael@0 239 }
michael@0 240
michael@0 241
michael@0 242 #if CONFIG_ERROR_CONCEALMENT
michael@0 243 if (corruption_detected)
michael@0 244 {
michael@0 245 return;
michael@0 246 }
michael@0 247 #endif
michael@0 248
michael@0 249 if(!xd->mode_info_context->mbmi.mb_skip_coeff)
michael@0 250 {
michael@0 251 /* dequantization and idct */
michael@0 252 if (mode != B_PRED)
michael@0 253 {
michael@0 254 short *DQC = xd->dequant_y1;
michael@0 255
michael@0 256 if (mode != SPLITMV)
michael@0 257 {
michael@0 258 BLOCKD *b = &xd->block[24];
michael@0 259
michael@0 260 /* do 2nd order transform on the dc block */
michael@0 261 if (xd->eobs[24] > 1)
michael@0 262 {
michael@0 263 vp8_dequantize_b(b, xd->dequant_y2);
michael@0 264
michael@0 265 vp8_short_inv_walsh4x4(&b->dqcoeff[0],
michael@0 266 xd->qcoeff);
michael@0 267 vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
michael@0 268 }
michael@0 269 else
michael@0 270 {
michael@0 271 b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
michael@0 272 vp8_short_inv_walsh4x4_1(&b->dqcoeff[0],
michael@0 273 xd->qcoeff);
michael@0 274 vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
michael@0 275 }
michael@0 276
michael@0 277 /* override the dc dequant constant in order to preserve the
michael@0 278 * dc components
michael@0 279 */
michael@0 280 DQC = xd->dequant_y1_dc;
michael@0 281 }
michael@0 282
michael@0 283 vp8_dequant_idct_add_y_block
michael@0 284 (xd->qcoeff, DQC,
michael@0 285 xd->dst.y_buffer,
michael@0 286 xd->dst.y_stride, xd->eobs);
michael@0 287 }
michael@0 288
michael@0 289 vp8_dequant_idct_add_uv_block
michael@0 290 (xd->qcoeff+16*16, xd->dequant_uv,
michael@0 291 xd->dst.u_buffer, xd->dst.v_buffer,
michael@0 292 xd->dst.uv_stride, xd->eobs+16);
michael@0 293 }
michael@0 294 }
michael@0 295
michael@0 296 static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row)
michael@0 297 {
michael@0 298 volatile const int *last_row_current_mb_col;
michael@0 299 volatile int *current_mb_col;
michael@0 300 int mb_row;
michael@0 301 VP8_COMMON *pc = &pbi->common;
michael@0 302 const int nsync = pbi->sync_range;
michael@0 303 const int first_row_no_sync_above = pc->mb_cols + nsync;
michael@0 304 int num_part = 1 << pbi->common.multi_token_partition;
michael@0 305 int last_mb_row = start_mb_row;
michael@0 306
michael@0 307 YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
michael@0 308 YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
michael@0 309
michael@0 310 int recon_y_stride = yv12_fb_new->y_stride;
michael@0 311 int recon_uv_stride = yv12_fb_new->uv_stride;
michael@0 312
michael@0 313 unsigned char *ref_buffer[MAX_REF_FRAMES][3];
michael@0 314 unsigned char *dst_buffer[3];
michael@0 315 int i;
michael@0 316 int ref_fb_corrupted[MAX_REF_FRAMES];
michael@0 317
michael@0 318 ref_fb_corrupted[INTRA_FRAME] = 0;
michael@0 319
michael@0 320 for(i = 1; i < MAX_REF_FRAMES; i++)
michael@0 321 {
michael@0 322 YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
michael@0 323
michael@0 324 ref_buffer[i][0] = this_fb->y_buffer;
michael@0 325 ref_buffer[i][1] = this_fb->u_buffer;
michael@0 326 ref_buffer[i][2] = this_fb->v_buffer;
michael@0 327
michael@0 328 ref_fb_corrupted[i] = this_fb->corrupted;
michael@0 329 }
michael@0 330
michael@0 331 dst_buffer[0] = yv12_fb_new->y_buffer;
michael@0 332 dst_buffer[1] = yv12_fb_new->u_buffer;
michael@0 333 dst_buffer[2] = yv12_fb_new->v_buffer;
michael@0 334
michael@0 335 xd->up_available = (start_mb_row != 0);
michael@0 336
michael@0 337 for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
michael@0 338 {
michael@0 339 int recon_yoffset, recon_uvoffset;
michael@0 340 int mb_col;
michael@0 341 int filter_level;
michael@0 342 loop_filter_info_n *lfi_n = &pc->lf_info;
michael@0 343
michael@0 344 /* save last row processed by this thread */
michael@0 345 last_mb_row = mb_row;
michael@0 346 /* select bool coder for current partition */
michael@0 347 xd->current_bc = &pbi->mbc[mb_row%num_part];
michael@0 348
michael@0 349 if (mb_row > 0)
michael@0 350 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
michael@0 351 else
michael@0 352 last_row_current_mb_col = &first_row_no_sync_above;
michael@0 353
michael@0 354 current_mb_col = &pbi->mt_current_mb_col[mb_row];
michael@0 355
michael@0 356 recon_yoffset = mb_row * recon_y_stride * 16;
michael@0 357 recon_uvoffset = mb_row * recon_uv_stride * 8;
michael@0 358
michael@0 359 /* reset contexts */
michael@0 360 xd->above_context = pc->above_context;
michael@0 361 vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
michael@0 362
michael@0 363 xd->left_available = 0;
michael@0 364
michael@0 365 xd->mb_to_top_edge = -((mb_row * 16)) << 3;
michael@0 366 xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
michael@0 367
michael@0 368 if (pbi->common.filter_level)
michael@0 369 {
michael@0 370 xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0*16 +32;
michael@0 371 xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0*8 +16;
michael@0 372 xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0*8 +16;
michael@0 373
michael@0 374 xd->recon_left[0] = pbi->mt_yleft_col[mb_row];
michael@0 375 xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
michael@0 376 xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
michael@0 377
michael@0 378 /* TODO: move to outside row loop */
michael@0 379 xd->recon_left_stride[0] = 1;
michael@0 380 xd->recon_left_stride[1] = 1;
michael@0 381 }
michael@0 382 else
michael@0 383 {
michael@0 384 xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
michael@0 385 xd->recon_above[1] = dst_buffer[1] + recon_uvoffset;
michael@0 386 xd->recon_above[2] = dst_buffer[2] + recon_uvoffset;
michael@0 387
michael@0 388 xd->recon_left[0] = xd->recon_above[0] - 1;
michael@0 389 xd->recon_left[1] = xd->recon_above[1] - 1;
michael@0 390 xd->recon_left[2] = xd->recon_above[2] - 1;
michael@0 391
michael@0 392 xd->recon_above[0] -= xd->dst.y_stride;
michael@0 393 xd->recon_above[1] -= xd->dst.uv_stride;
michael@0 394 xd->recon_above[2] -= xd->dst.uv_stride;
michael@0 395
michael@0 396 /* TODO: move to outside row loop */
michael@0 397 xd->recon_left_stride[0] = xd->dst.y_stride;
michael@0 398 xd->recon_left_stride[1] = xd->dst.uv_stride;
michael@0 399
michael@0 400 setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
michael@0 401 xd->recon_left[2], xd->dst.y_stride,
michael@0 402 xd->dst.uv_stride);
michael@0 403 }
michael@0 404
michael@0 405 for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
michael@0 406 {
michael@0 407 *current_mb_col = mb_col - 1;
michael@0 408
michael@0 409 if ((mb_col & (nsync - 1)) == 0)
michael@0 410 {
michael@0 411 while (mb_col > (*last_row_current_mb_col - nsync))
michael@0 412 {
michael@0 413 x86_pause_hint();
michael@0 414 thread_sleep(0);
michael@0 415 }
michael@0 416 }
michael@0 417
michael@0 418 /* Distance of MB to the various image edges.
michael@0 419 * These are specified to 8th pel as they are always
michael@0 420 * compared to values that are in 1/8th pel units.
michael@0 421 */
michael@0 422 xd->mb_to_left_edge = -((mb_col * 16) << 3);
michael@0 423 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
michael@0 424
michael@0 425 #if CONFIG_ERROR_CONCEALMENT
michael@0 426 {
michael@0 427 int corrupt_residual =
michael@0 428 (!pbi->independent_partitions &&
michael@0 429 pbi->frame_corrupt_residual) ||
michael@0 430 vp8dx_bool_error(xd->current_bc);
michael@0 431 if (pbi->ec_active &&
michael@0 432 (xd->mode_info_context->mbmi.ref_frame ==
michael@0 433 INTRA_FRAME) &&
michael@0 434 corrupt_residual)
michael@0 435 {
michael@0 436 /* We have an intra block with corrupt
michael@0 437 * coefficients, better to conceal with an inter
michael@0 438 * block.
michael@0 439 * Interpolate MVs from neighboring MBs
michael@0 440 *
michael@0 441 * Note that for the first mb with corrupt
michael@0 442 * residual in a frame, we might not discover
michael@0 443 * that before decoding the residual. That
michael@0 444 * happens after this check, and therefore no
michael@0 445 * inter concealment will be done.
michael@0 446 */
michael@0 447 vp8_interpolate_motion(xd,
michael@0 448 mb_row, mb_col,
michael@0 449 pc->mb_rows, pc->mb_cols,
michael@0 450 pc->mode_info_stride);
michael@0 451 }
michael@0 452 }
michael@0 453 #endif
michael@0 454
michael@0 455
michael@0 456 xd->dst.y_buffer = dst_buffer[0] + recon_yoffset;
michael@0 457 xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset;
michael@0 458 xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset;
michael@0 459
michael@0 460 xd->pre.y_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset;
michael@0 461 xd->pre.u_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset;
michael@0 462 xd->pre.v_buffer = ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset;
michael@0 463
michael@0 464 /* propagate errors from reference frames */
michael@0 465 xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
michael@0 466
michael@0 467 mt_decode_macroblock(pbi, xd, 0);
michael@0 468
michael@0 469 xd->left_available = 1;
michael@0 470
michael@0 471 /* check if the boolean decoder has suffered an error */
michael@0 472 xd->corrupted |= vp8dx_bool_error(xd->current_bc);
michael@0 473
michael@0 474 xd->recon_above[0] += 16;
michael@0 475 xd->recon_above[1] += 8;
michael@0 476 xd->recon_above[2] += 8;
michael@0 477
michael@0 478 if (!pbi->common.filter_level)
michael@0 479 {
michael@0 480 xd->recon_left[0] += 16;
michael@0 481 xd->recon_left[1] += 8;
michael@0 482 xd->recon_left[2] += 8;
michael@0 483 }
michael@0 484
michael@0 485 if (pbi->common.filter_level)
michael@0 486 {
michael@0 487 int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
michael@0 488 xd->mode_info_context->mbmi.mode != SPLITMV &&
michael@0 489 xd->mode_info_context->mbmi.mb_skip_coeff);
michael@0 490
michael@0 491 const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
michael@0 492 const int seg = xd->mode_info_context->mbmi.segment_id;
michael@0 493 const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
michael@0 494
michael@0 495 filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
michael@0 496
michael@0 497 if( mb_row != pc->mb_rows-1 )
michael@0 498 {
michael@0 499 /* Save decoded MB last row data for next-row decoding */
michael@0 500 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
michael@0 501 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
michael@0 502 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
michael@0 503 }
michael@0 504
michael@0 505 /* save left_col for next MB decoding */
michael@0 506 if(mb_col != pc->mb_cols-1)
michael@0 507 {
michael@0 508 MODE_INFO *next = xd->mode_info_context +1;
michael@0 509
michael@0 510 if (next->mbmi.ref_frame == INTRA_FRAME)
michael@0 511 {
michael@0 512 for (i = 0; i < 16; i++)
michael@0 513 pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
michael@0 514 for (i = 0; i < 8; i++)
michael@0 515 {
michael@0 516 pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
michael@0 517 pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
michael@0 518 }
michael@0 519 }
michael@0 520 }
michael@0 521
michael@0 522 /* loopfilter on this macroblock. */
michael@0 523 if (filter_level)
michael@0 524 {
michael@0 525 if(pc->filter_type == NORMAL_LOOPFILTER)
michael@0 526 {
michael@0 527 loop_filter_info lfi;
michael@0 528 FRAME_TYPE frame_type = pc->frame_type;
michael@0 529 const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
michael@0 530 lfi.mblim = lfi_n->mblim[filter_level];
michael@0 531 lfi.blim = lfi_n->blim[filter_level];
michael@0 532 lfi.lim = lfi_n->lim[filter_level];
michael@0 533 lfi.hev_thr = lfi_n->hev_thr[hev_index];
michael@0 534
michael@0 535 if (mb_col > 0)
michael@0 536 vp8_loop_filter_mbv
michael@0 537 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
michael@0 538
michael@0 539 if (!skip_lf)
michael@0 540 vp8_loop_filter_bv
michael@0 541 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
michael@0 542
michael@0 543 /* don't apply across umv border */
michael@0 544 if (mb_row > 0)
michael@0 545 vp8_loop_filter_mbh
michael@0 546 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
michael@0 547
michael@0 548 if (!skip_lf)
michael@0 549 vp8_loop_filter_bh
michael@0 550 (xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi);
michael@0 551 }
michael@0 552 else
michael@0 553 {
michael@0 554 if (mb_col > 0)
michael@0 555 vp8_loop_filter_simple_mbv
michael@0 556 (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
michael@0 557
michael@0 558 if (!skip_lf)
michael@0 559 vp8_loop_filter_simple_bv
michael@0 560 (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
michael@0 561
michael@0 562 /* don't apply across umv border */
michael@0 563 if (mb_row > 0)
michael@0 564 vp8_loop_filter_simple_mbh
michael@0 565 (xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]);
michael@0 566
michael@0 567 if (!skip_lf)
michael@0 568 vp8_loop_filter_simple_bh
michael@0 569 (xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]);
michael@0 570 }
michael@0 571 }
michael@0 572
michael@0 573 }
michael@0 574
michael@0 575 recon_yoffset += 16;
michael@0 576 recon_uvoffset += 8;
michael@0 577
michael@0 578 ++xd->mode_info_context; /* next mb */
michael@0 579
michael@0 580 xd->above_context++;
michael@0 581 }
michael@0 582
michael@0 583 /* adjust to the next row of mbs */
michael@0 584 if (pbi->common.filter_level)
michael@0 585 {
michael@0 586 if(mb_row != pc->mb_rows-1)
michael@0 587 {
michael@0 588 int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
michael@0 589 int lastuv = (yv12_fb_lst->y_width>>1) + (VP8BORDERINPIXELS>>1);
michael@0 590
michael@0 591 for (i = 0; i < 4; i++)
michael@0 592 {
michael@0 593 pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
michael@0 594 pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
michael@0 595 pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
michael@0 596 }
michael@0 597 }
michael@0 598 }
michael@0 599 else
michael@0 600 vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
michael@0 601 xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
michael@0 602
michael@0 603 /* last MB of row is ready just after extension is done */
michael@0 604 *current_mb_col = mb_col + nsync;
michael@0 605
michael@0 606 ++xd->mode_info_context; /* skip prediction column */
michael@0 607 xd->up_available = 1;
michael@0 608
michael@0 609 /* since we have multithread */
michael@0 610 xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
michael@0 611 }
michael@0 612
michael@0 613 /* signal end of frame decoding if this thread processed the last mb_row */
michael@0 614 if (last_mb_row == (pc->mb_rows - 1))
michael@0 615 sem_post(&pbi->h_event_end_decoding);
michael@0 616
michael@0 617 }
michael@0 618
michael@0 619
michael@0 620 static THREAD_FUNCTION thread_decoding_proc(void *p_data)
michael@0 621 {
michael@0 622 int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
michael@0 623 VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
michael@0 624 MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
michael@0 625 ENTROPY_CONTEXT_PLANES mb_row_left_context;
michael@0 626
michael@0 627 while (1)
michael@0 628 {
michael@0 629 if (pbi->b_multithreaded_rd == 0)
michael@0 630 break;
michael@0 631
michael@0 632 if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
michael@0 633 {
michael@0 634 if (pbi->b_multithreaded_rd == 0)
michael@0 635 break;
michael@0 636 else
michael@0 637 {
michael@0 638 MACROBLOCKD *xd = &mbrd->mbd;
michael@0 639 xd->left_context = &mb_row_left_context;
michael@0 640
michael@0 641 mt_decode_mb_rows(pbi, xd, ithread+1);
michael@0 642 }
michael@0 643 }
michael@0 644 }
michael@0 645
michael@0 646 return 0 ;
michael@0 647 }
michael@0 648
michael@0 649
michael@0 650 void vp8_decoder_create_threads(VP8D_COMP *pbi)
michael@0 651 {
michael@0 652 int core_count = 0;
michael@0 653 unsigned int ithread;
michael@0 654
michael@0 655 pbi->b_multithreaded_rd = 0;
michael@0 656 pbi->allocated_decoding_thread_count = 0;
michael@0 657
michael@0 658 /* limit decoding threads to the max number of token partitions */
michael@0 659 core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
michael@0 660
michael@0 661 /* limit decoding threads to the available cores */
michael@0 662 if (core_count > pbi->common.processor_core_count)
michael@0 663 core_count = pbi->common.processor_core_count;
michael@0 664
michael@0 665 if (core_count > 1)
michael@0 666 {
michael@0 667 pbi->b_multithreaded_rd = 1;
michael@0 668 pbi->decoding_thread_count = core_count - 1;
michael@0 669
michael@0 670 CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
michael@0 671 CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
michael@0 672 CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
michael@0 673 CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
michael@0 674
michael@0 675 for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
michael@0 676 {
michael@0 677 sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
michael@0 678
michael@0 679 vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
michael@0 680
michael@0 681 pbi->de_thread_data[ithread].ithread = ithread;
michael@0 682 pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
michael@0 683 pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread];
michael@0 684
michael@0 685 pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, (&pbi->de_thread_data[ithread]));
michael@0 686 }
michael@0 687
michael@0 688 sem_init(&pbi->h_event_end_decoding, 0, 0);
michael@0 689
michael@0 690 pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
michael@0 691 }
michael@0 692 }
michael@0 693
michael@0 694
michael@0 695 void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
michael@0 696 {
michael@0 697 int i;
michael@0 698
michael@0 699 if (pbi->b_multithreaded_rd)
michael@0 700 {
michael@0 701 vpx_free(pbi->mt_current_mb_col);
michael@0 702 pbi->mt_current_mb_col = NULL ;
michael@0 703
michael@0 704 /* Free above_row buffers. */
michael@0 705 if (pbi->mt_yabove_row)
michael@0 706 {
michael@0 707 for (i=0; i< mb_rows; i++)
michael@0 708 {
michael@0 709 vpx_free(pbi->mt_yabove_row[i]);
michael@0 710 pbi->mt_yabove_row[i] = NULL ;
michael@0 711 }
michael@0 712 vpx_free(pbi->mt_yabove_row);
michael@0 713 pbi->mt_yabove_row = NULL ;
michael@0 714 }
michael@0 715
michael@0 716 if (pbi->mt_uabove_row)
michael@0 717 {
michael@0 718 for (i=0; i< mb_rows; i++)
michael@0 719 {
michael@0 720 vpx_free(pbi->mt_uabove_row[i]);
michael@0 721 pbi->mt_uabove_row[i] = NULL ;
michael@0 722 }
michael@0 723 vpx_free(pbi->mt_uabove_row);
michael@0 724 pbi->mt_uabove_row = NULL ;
michael@0 725 }
michael@0 726
michael@0 727 if (pbi->mt_vabove_row)
michael@0 728 {
michael@0 729 for (i=0; i< mb_rows; i++)
michael@0 730 {
michael@0 731 vpx_free(pbi->mt_vabove_row[i]);
michael@0 732 pbi->mt_vabove_row[i] = NULL ;
michael@0 733 }
michael@0 734 vpx_free(pbi->mt_vabove_row);
michael@0 735 pbi->mt_vabove_row = NULL ;
michael@0 736 }
michael@0 737
michael@0 738 /* Free left_col buffers. */
michael@0 739 if (pbi->mt_yleft_col)
michael@0 740 {
michael@0 741 for (i=0; i< mb_rows; i++)
michael@0 742 {
michael@0 743 vpx_free(pbi->mt_yleft_col[i]);
michael@0 744 pbi->mt_yleft_col[i] = NULL ;
michael@0 745 }
michael@0 746 vpx_free(pbi->mt_yleft_col);
michael@0 747 pbi->mt_yleft_col = NULL ;
michael@0 748 }
michael@0 749
michael@0 750 if (pbi->mt_uleft_col)
michael@0 751 {
michael@0 752 for (i=0; i< mb_rows; i++)
michael@0 753 {
michael@0 754 vpx_free(pbi->mt_uleft_col[i]);
michael@0 755 pbi->mt_uleft_col[i] = NULL ;
michael@0 756 }
michael@0 757 vpx_free(pbi->mt_uleft_col);
michael@0 758 pbi->mt_uleft_col = NULL ;
michael@0 759 }
michael@0 760
michael@0 761 if (pbi->mt_vleft_col)
michael@0 762 {
michael@0 763 for (i=0; i< mb_rows; i++)
michael@0 764 {
michael@0 765 vpx_free(pbi->mt_vleft_col[i]);
michael@0 766 pbi->mt_vleft_col[i] = NULL ;
michael@0 767 }
michael@0 768 vpx_free(pbi->mt_vleft_col);
michael@0 769 pbi->mt_vleft_col = NULL ;
michael@0 770 }
michael@0 771 }
michael@0 772 }
michael@0 773
michael@0 774
michael@0 775 void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
michael@0 776 {
michael@0 777 VP8_COMMON *const pc = & pbi->common;
michael@0 778 int i;
michael@0 779 int uv_width;
michael@0 780
michael@0 781 if (pbi->b_multithreaded_rd)
michael@0 782 {
michael@0 783 vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
michael@0 784
michael@0 785 /* our internal buffers are always multiples of 16 */
michael@0 786 if ((width & 0xf) != 0)
michael@0 787 width += 16 - (width & 0xf);
michael@0 788
michael@0 789 if (width < 640) pbi->sync_range = 1;
michael@0 790 else if (width <= 1280) pbi->sync_range = 8;
michael@0 791 else if (width <= 2560) pbi->sync_range =16;
michael@0 792 else pbi->sync_range = 32;
michael@0 793
michael@0 794 uv_width = width >>1;
michael@0 795
michael@0 796 /* Allocate an int for each mb row. */
michael@0 797 CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
michael@0 798
michael@0 799 /* Allocate memory for above_row buffers. */
michael@0 800 CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
michael@0 801 for (i = 0; i < pc->mb_rows; i++)
michael@0 802 CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1))));
michael@0 803
michael@0 804 CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
michael@0 805 for (i = 0; i < pc->mb_rows; i++)
michael@0 806 CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
michael@0 807
michael@0 808 CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
michael@0 809 for (i = 0; i < pc->mb_rows; i++)
michael@0 810 CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16,sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
michael@0 811
michael@0 812 /* Allocate memory for left_col buffers. */
michael@0 813 CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
michael@0 814 for (i = 0; i < pc->mb_rows; i++)
michael@0 815 CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
michael@0 816
michael@0 817 CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
michael@0 818 for (i = 0; i < pc->mb_rows; i++)
michael@0 819 CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
michael@0 820
michael@0 821 CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
michael@0 822 for (i = 0; i < pc->mb_rows; i++)
michael@0 823 CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
michael@0 824 }
michael@0 825 }
michael@0 826
michael@0 827
michael@0 828 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
michael@0 829 {
michael@0 830 /* shutdown MB Decoding thread; */
michael@0 831 if (pbi->b_multithreaded_rd)
michael@0 832 {
michael@0 833 int i;
michael@0 834
michael@0 835 pbi->b_multithreaded_rd = 0;
michael@0 836
michael@0 837 /* allow all threads to exit */
michael@0 838 for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
michael@0 839 {
michael@0 840 sem_post(&pbi->h_event_start_decoding[i]);
michael@0 841 pthread_join(pbi->h_decoding_thread[i], NULL);
michael@0 842 }
michael@0 843
michael@0 844 for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
michael@0 845 {
michael@0 846 sem_destroy(&pbi->h_event_start_decoding[i]);
michael@0 847 }
michael@0 848
michael@0 849 sem_destroy(&pbi->h_event_end_decoding);
michael@0 850
michael@0 851 vpx_free(pbi->h_decoding_thread);
michael@0 852 pbi->h_decoding_thread = NULL;
michael@0 853
michael@0 854 vpx_free(pbi->h_event_start_decoding);
michael@0 855 pbi->h_event_start_decoding = NULL;
michael@0 856
michael@0 857 vpx_free(pbi->mb_row_di);
michael@0 858 pbi->mb_row_di = NULL ;
michael@0 859
michael@0 860 vpx_free(pbi->de_thread_data);
michael@0 861 pbi->de_thread_data = NULL;
michael@0 862 }
michael@0 863 }
michael@0 864
michael@0 865 void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
michael@0 866 {
michael@0 867 VP8_COMMON *pc = &pbi->common;
michael@0 868 unsigned int i;
michael@0 869 int j;
michael@0 870
michael@0 871 int filter_level = pc->filter_level;
michael@0 872 YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
michael@0 873
michael@0 874 if (filter_level)
michael@0 875 {
michael@0 876 /* Set above_row buffer to 127 for decoding first MB row */
michael@0 877 vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, yv12_fb_new->y_width + 5);
michael@0 878 vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
michael@0 879 vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (yv12_fb_new->y_width>>1) +5);
michael@0 880
michael@0 881 for (j=1; j<pc->mb_rows; j++)
michael@0 882 {
michael@0 883 vpx_memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
michael@0 884 vpx_memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
michael@0 885 vpx_memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
michael@0 886 }
michael@0 887
michael@0 888 /* Set left_col to 129 initially */
michael@0 889 for (j=0; j<pc->mb_rows; j++)
michael@0 890 {
michael@0 891 vpx_memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
michael@0 892 vpx_memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
michael@0 893 vpx_memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
michael@0 894 }
michael@0 895
michael@0 896 /* Initialize the loop filter for this frame. */
michael@0 897 vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
michael@0 898 }
michael@0 899 else
michael@0 900 vp8_setup_intra_recon_top_line(yv12_fb_new);
michael@0 901
michael@0 902 setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
michael@0 903
michael@0 904 for (i = 0; i < pbi->decoding_thread_count; i++)
michael@0 905 sem_post(&pbi->h_event_start_decoding[i]);
michael@0 906
michael@0 907 mt_decode_mb_rows(pbi, xd, 0);
michael@0 908
michael@0 909 sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
michael@0 910 }

mercurial