1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libvpx/vp8/encoder/ethreading.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,655 @@ 1.4 +/* 1.5 + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "onyx_int.h" 1.15 +#include "vp8/common/threading.h" 1.16 +#include "vp8/common/common.h" 1.17 +#include "vp8/common/extend.h" 1.18 +#include "bitstream.h" 1.19 +#include "encodeframe.h" 1.20 + 1.21 +#if CONFIG_MULTITHREAD 1.22 + 1.23 +extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); 1.24 + 1.25 +extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); 1.26 + 1.27 +static THREAD_FUNCTION thread_loopfilter(void *p_data) 1.28 +{ 1.29 + VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); 1.30 + VP8_COMMON *cm = &cpi->common; 1.31 + 1.32 + while (1) 1.33 + { 1.34 + if (cpi->b_multi_threaded == 0) 1.35 + break; 1.36 + 1.37 + if (sem_wait(&cpi->h_event_start_lpf) == 0) 1.38 + { 1.39 + if (cpi->b_multi_threaded == 0) /* we're shutting down */ 1.40 + break; 1.41 + 1.42 + vp8_loopfilter_frame(cpi, cm); 1.43 + 1.44 + sem_post(&cpi->h_event_end_lpf); 1.45 + } 1.46 + } 1.47 + 1.48 + return 0; 1.49 +} 1.50 + 1.51 +static 1.52 +THREAD_FUNCTION thread_encoding_proc(void *p_data) 1.53 +{ 1.54 + int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; 1.55 + VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); 1.56 + MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); 1.57 + ENTROPY_CONTEXT_PLANES mb_row_left_context; 1.58 + 1.59 + while (1) 1.60 + { 1.61 + if (cpi->b_multi_threaded == 0) 1.62 + break; 1.63 + 1.64 + if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) 1.65 + { 1.66 + const int nsync = cpi->mt_sync_range; 1.67 + VP8_COMMON *cm = &cpi->common; 1.68 + int mb_row; 1.69 + MACROBLOCK *x = &mbri->mb; 1.70 + MACROBLOCKD *xd = &x->e_mbd; 1.71 + TOKENEXTRA *tp ; 1.72 +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING 1.73 + TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24); 1.74 + const int num_part = (1 << cm->multi_token_partition); 1.75 +#endif 1.76 + 1.77 + int *segment_counts = mbri->segment_counts; 1.78 + int *totalrate = &mbri->totalrate; 1.79 + 1.80 + if (cpi->b_multi_threaded == 0) /* we're shutting down */ 1.81 + break; 1.82 + 1.83 + for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) 1.84 + { 1.85 + 1.86 + int recon_yoffset, recon_uvoffset; 1.87 + int mb_col; 1.88 + int ref_fb_idx = cm->lst_fb_idx; 1.89 + int dst_fb_idx = cm->new_fb_idx; 1.90 + int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; 1.91 + int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; 1.92 + int map_index = (mb_row * cm->mb_cols); 1.93 + volatile const int *last_row_current_mb_col; 1.94 + volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; 1.95 + 1.96 +#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) 1.97 + vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; 1.98 +#else 1.99 + tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); 1.100 + cpi->tplist[mb_row].start = tp; 1.101 +#endif 1.102 + 1.103 + last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; 1.104 + 1.105 + /* reset above block coeffs */ 1.106 + xd->above_context = cm->above_context; 1.107 + xd->left_context = &mb_row_left_context; 1.108 + 1.109 + vp8_zero(mb_row_left_context); 1.110 + 1.111 + xd->up_available = (mb_row != 0); 1.112 + recon_yoffset = (mb_row * recon_y_stride * 16); 1.113 + recon_uvoffset = (mb_row * recon_uv_stride * 8); 1.114 + 1.115 + /* Set the mb activity pointer to the start of the row. */ 1.116 + x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; 1.117 + 1.118 + /* for each macroblock col in image */ 1.119 + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) 1.120 + { 1.121 + *current_mb_col = mb_col - 1; 1.122 + 1.123 + if ((mb_col & (nsync - 1)) == 0) 1.124 + { 1.125 + while (mb_col > (*last_row_current_mb_col - nsync)) 1.126 + { 1.127 + x86_pause_hint(); 1.128 + thread_sleep(0); 1.129 + } 1.130 + } 1.131 + 1.132 +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING 1.133 + tp = tp_start; 1.134 +#endif 1.135 + 1.136 + /* Distance of Mb to the various image edges. 1.137 + * These specified to 8th pel as they are always compared 1.138 + * to values that are in 1/8th pel units 1.139 + */ 1.140 + xd->mb_to_left_edge = -((mb_col * 16) << 3); 1.141 + xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; 1.142 + xd->mb_to_top_edge = -((mb_row * 16) << 3); 1.143 + xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; 1.144 + 1.145 + /* Set up limit values for motion vectors used to prevent 1.146 + * them extending outside the UMV borders 1.147 + */ 1.148 + x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); 1.149 + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); 1.150 + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); 1.151 + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); 1.152 + 1.153 + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; 1.154 + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; 1.155 + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; 1.156 + xd->left_available = (mb_col != 0); 1.157 + 1.158 + x->rddiv = cpi->RDDIV; 1.159 + x->rdmult = cpi->RDMULT; 1.160 + 1.161 + /* Copy current mb to a buffer */ 1.162 + vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); 1.163 + 1.164 + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) 1.165 + vp8_activity_masking(cpi, x); 1.166 + 1.167 + /* Is segmentation enabled */ 1.168 + /* MB level adjustment to quantizer */ 1.169 + if (xd->segmentation_enabled) 1.170 + { 1.171 + /* Code to set segment id in xd->mbmi.segment_id for 1.172 + * current MB (with range checking) 1.173 + */ 1.174 + if (cpi->segmentation_map[map_index + mb_col] <= 3) 1.175 + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; 1.176 + else 1.177 + xd->mode_info_context->mbmi.segment_id = 0; 1.178 + 1.179 + vp8cx_mb_init_quantizer(cpi, x, 1); 1.180 + } 1.181 + else 1.182 + /* Set to Segment 0 by default */ 1.183 + xd->mode_info_context->mbmi.segment_id = 0; 1.184 + 1.185 + x->active_ptr = cpi->active_map + map_index + mb_col; 1.186 + 1.187 + if (cm->frame_type == KEY_FRAME) 1.188 + { 1.189 + *totalrate += vp8cx_encode_intra_macroblock(cpi, x, &tp); 1.190 +#ifdef MODE_STATS 1.191 + y_modes[xd->mbmi.mode] ++; 1.192 +#endif 1.193 + } 1.194 + else 1.195 + { 1.196 + *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset, mb_row, mb_col); 1.197 + 1.198 +#ifdef MODE_STATS 1.199 + inter_y_modes[xd->mbmi.mode] ++; 1.200 + 1.201 + if (xd->mbmi.mode == SPLITMV) 1.202 + { 1.203 + int b; 1.204 + 1.205 + for (b = 0; b < xd->mbmi.partition_count; b++) 1.206 + { 1.207 + inter_b_modes[x->partition->bmi[b].mode] ++; 1.208 + } 1.209 + } 1.210 + 1.211 +#endif 1.212 + 1.213 + /* Special case code for cyclic refresh 1.214 + * If cyclic update enabled then copy 1.215 + * xd->mbmi.segment_id; (which may have been updated 1.216 + * based on mode during 1.217 + * vp8cx_encode_inter_macroblock()) back into the 1.218 + * global segmentation map 1.219 + */ 1.220 + if ((cpi->current_layer == 0) && 1.221 + (cpi->cyclic_refresh_mode_enabled && 1.222 + xd->segmentation_enabled)) 1.223 + { 1.224 + const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; 1.225 + cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; 1.226 + 1.227 + /* If the block has been refreshed mark it as clean 1.228 + * (the magnitude of the -ve influences how long it 1.229 + * will be before we consider another refresh): 1.230 + * Else if it was coded (last frame 0,0) and has 1.231 + * not already been refreshed then mark it as a 1.232 + * candidate for cleanup next time (marked 0) else 1.233 + * mark it as dirty (1). 1.234 + */ 1.235 + if (mbmi->segment_id) 1.236 + cpi->cyclic_refresh_map[map_index + mb_col] = -1; 1.237 + else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) 1.238 + { 1.239 + if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) 1.240 + cpi->cyclic_refresh_map[map_index + mb_col] = 0; 1.241 + } 1.242 + else 1.243 + cpi->cyclic_refresh_map[map_index + mb_col] = 1; 1.244 + 1.245 + } 1.246 + } 1.247 + 1.248 +#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING 1.249 + /* pack tokens for this MB */ 1.250 + { 1.251 + int tok_count = tp - tp_start; 1.252 + pack_tokens(w, tp_start, tok_count); 1.253 + } 1.254 +#else 1.255 + cpi->tplist[mb_row].stop = tp; 1.256 +#endif 1.257 + /* Increment pointer into gf usage flags structure. */ 1.258 + x->gf_active_ptr++; 1.259 + 1.260 + /* Increment the activity mask pointers. */ 1.261 + x->mb_activity_ptr++; 1.262 + 1.263 + /* adjust to the next column of macroblocks */ 1.264 + x->src.y_buffer += 16; 1.265 + x->src.u_buffer += 8; 1.266 + x->src.v_buffer += 8; 1.267 + 1.268 + recon_yoffset += 16; 1.269 + recon_uvoffset += 8; 1.270 + 1.271 + /* Keep track of segment usage */ 1.272 + segment_counts[xd->mode_info_context->mbmi.segment_id]++; 1.273 + 1.274 + /* skip to next mb */ 1.275 + xd->mode_info_context++; 1.276 + x->partition_info++; 1.277 + xd->above_context++; 1.278 + } 1.279 + 1.280 + vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], 1.281 + xd->dst.y_buffer + 16, 1.282 + xd->dst.u_buffer + 8, 1.283 + xd->dst.v_buffer + 8); 1.284 + 1.285 + *current_mb_col = mb_col + nsync; 1.286 + 1.287 + /* this is to account for the border */ 1.288 + xd->mode_info_context++; 1.289 + x->partition_info++; 1.290 + 1.291 + x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; 1.292 + x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; 1.293 + x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; 1.294 + 1.295 + xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; 1.296 + x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; 1.297 + x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; 1.298 + 1.299 + if (mb_row == cm->mb_rows - 1) 1.300 + { 1.301 + sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ 1.302 + } 1.303 + } 1.304 + } 1.305 + } 1.306 + 1.307 + /* printf("exit thread %d\n", ithread); */ 1.308 + return 0; 1.309 +} 1.310 + 1.311 +static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) 1.312 +{ 1.313 + 1.314 + MACROBLOCK *x = mbsrc; 1.315 + MACROBLOCK *z = mbdst; 1.316 + int i; 1.317 + 1.318 + z->ss = x->ss; 1.319 + z->ss_count = x->ss_count; 1.320 + z->searches_per_step = x->searches_per_step; 1.321 + z->errorperbit = x->errorperbit; 1.322 + 1.323 + z->sadperbit16 = x->sadperbit16; 1.324 + z->sadperbit4 = x->sadperbit4; 1.325 + 1.326 + /* 1.327 + z->mv_col_min = x->mv_col_min; 1.328 + z->mv_col_max = x->mv_col_max; 1.329 + z->mv_row_min = x->mv_row_min; 1.330 + z->mv_row_max = x->mv_row_max; 1.331 + */ 1.332 + 1.333 + z->short_fdct4x4 = x->short_fdct4x4; 1.334 + z->short_fdct8x4 = x->short_fdct8x4; 1.335 + z->short_walsh4x4 = x->short_walsh4x4; 1.336 + z->quantize_b = x->quantize_b; 1.337 + z->quantize_b_pair = x->quantize_b_pair; 1.338 + z->optimize = x->optimize; 1.339 + 1.340 + /* 1.341 + z->mvc = x->mvc; 1.342 + z->src.y_buffer = x->src.y_buffer; 1.343 + z->src.u_buffer = x->src.u_buffer; 1.344 + z->src.v_buffer = x->src.v_buffer; 1.345 + */ 1.346 + 1.347 + z->mvcost[0] = x->mvcost[0]; 1.348 + z->mvcost[1] = x->mvcost[1]; 1.349 + z->mvsadcost[0] = x->mvsadcost[0]; 1.350 + z->mvsadcost[1] = x->mvsadcost[1]; 1.351 + 1.352 + z->token_costs = x->token_costs; 1.353 + z->inter_bmode_costs = x->inter_bmode_costs; 1.354 + z->mbmode_cost = x->mbmode_cost; 1.355 + z->intra_uv_mode_cost = x->intra_uv_mode_cost; 1.356 + z->bmode_costs = x->bmode_costs; 1.357 + 1.358 + for (i = 0; i < 25; i++) 1.359 + { 1.360 + z->block[i].quant = x->block[i].quant; 1.361 + z->block[i].quant_fast = x->block[i].quant_fast; 1.362 + z->block[i].quant_shift = x->block[i].quant_shift; 1.363 + z->block[i].zbin = x->block[i].zbin; 1.364 + z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; 1.365 + z->block[i].round = x->block[i].round; 1.366 + z->block[i].src_stride = x->block[i].src_stride; 1.367 + } 1.368 + 1.369 + z->q_index = x->q_index; 1.370 + z->act_zbin_adj = x->act_zbin_adj; 1.371 + z->last_act_zbin_adj = x->last_act_zbin_adj; 1.372 + 1.373 + { 1.374 + MACROBLOCKD *xd = &x->e_mbd; 1.375 + MACROBLOCKD *zd = &z->e_mbd; 1.376 + 1.377 + /* 1.378 + zd->mode_info_context = xd->mode_info_context; 1.379 + zd->mode_info = xd->mode_info; 1.380 + 1.381 + zd->mode_info_stride = xd->mode_info_stride; 1.382 + zd->frame_type = xd->frame_type; 1.383 + zd->up_available = xd->up_available ; 1.384 + zd->left_available = xd->left_available; 1.385 + zd->left_context = xd->left_context; 1.386 + zd->last_frame_dc = xd->last_frame_dc; 1.387 + zd->last_frame_dccons = xd->last_frame_dccons; 1.388 + zd->gold_frame_dc = xd->gold_frame_dc; 1.389 + zd->gold_frame_dccons = xd->gold_frame_dccons; 1.390 + zd->mb_to_left_edge = xd->mb_to_left_edge; 1.391 + zd->mb_to_right_edge = xd->mb_to_right_edge; 1.392 + zd->mb_to_top_edge = xd->mb_to_top_edge ; 1.393 + zd->mb_to_bottom_edge = xd->mb_to_bottom_edge; 1.394 + zd->gf_active_ptr = xd->gf_active_ptr; 1.395 + zd->frames_since_golden = xd->frames_since_golden; 1.396 + zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame; 1.397 + */ 1.398 + zd->subpixel_predict = xd->subpixel_predict; 1.399 + zd->subpixel_predict8x4 = xd->subpixel_predict8x4; 1.400 + zd->subpixel_predict8x8 = xd->subpixel_predict8x8; 1.401 + zd->subpixel_predict16x16 = xd->subpixel_predict16x16; 1.402 + zd->segmentation_enabled = xd->segmentation_enabled; 1.403 + zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; 1.404 + vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, 1.405 + sizeof(xd->segment_feature_data)); 1.406 + 1.407 + vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, 1.408 + sizeof(xd->dequant_y1_dc)); 1.409 + vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); 1.410 + vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); 1.411 + vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); 1.412 + 1.413 +#if 1 1.414 + /*TODO: Remove dequant from BLOCKD. This is a temporary solution until 1.415 + * the quantizer code uses a passed in pointer to the dequant constants. 1.416 + * This will also require modifications to the x86 and neon assembly. 1.417 + * */ 1.418 + for (i = 0; i < 16; i++) 1.419 + zd->block[i].dequant = zd->dequant_y1; 1.420 + for (i = 16; i < 24; i++) 1.421 + zd->block[i].dequant = zd->dequant_uv; 1.422 + zd->block[24].dequant = zd->dequant_y2; 1.423 +#endif 1.424 + 1.425 + 1.426 + vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); 1.427 + vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult, 1.428 + sizeof(x->rd_thresh_mult)); 1.429 + 1.430 + z->zbin_over_quant = x->zbin_over_quant; 1.431 + z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; 1.432 + z->zbin_mode_boost = x->zbin_mode_boost; 1.433 + 1.434 + vpx_memset(z->error_bins, 0, sizeof(z->error_bins)); 1.435 + } 1.436 +} 1.437 + 1.438 +void vp8cx_init_mbrthread_data(VP8_COMP *cpi, 1.439 + MACROBLOCK *x, 1.440 + MB_ROW_COMP *mbr_ei, 1.441 + int count 1.442 + ) 1.443 +{ 1.444 + 1.445 + VP8_COMMON *const cm = & cpi->common; 1.446 + MACROBLOCKD *const xd = & x->e_mbd; 1.447 + int i; 1.448 + 1.449 + for (i = 0; i < count; i++) 1.450 + { 1.451 + MACROBLOCK *mb = & mbr_ei[i].mb; 1.452 + MACROBLOCKD *mbd = &mb->e_mbd; 1.453 + 1.454 + mbd->subpixel_predict = xd->subpixel_predict; 1.455 + mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; 1.456 + mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; 1.457 + mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; 1.458 + mb->gf_active_ptr = x->gf_active_ptr; 1.459 + 1.460 + vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); 1.461 + mbr_ei[i].totalrate = 0; 1.462 + 1.463 + mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); 1.464 + 1.465 + mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1); 1.466 + mbd->mode_info_stride = cm->mode_info_stride; 1.467 + 1.468 + mbd->frame_type = cm->frame_type; 1.469 + 1.470 + mb->src = * cpi->Source; 1.471 + mbd->pre = cm->yv12_fb[cm->lst_fb_idx]; 1.472 + mbd->dst = cm->yv12_fb[cm->new_fb_idx]; 1.473 + 1.474 + mb->src.y_buffer += 16 * x->src.y_stride * (i + 1); 1.475 + mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); 1.476 + mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1); 1.477 + 1.478 + vp8_build_block_offsets(mb); 1.479 + 1.480 + mbd->left_context = &cm->left_context; 1.481 + mb->mvc = cm->fc.mvc; 1.482 + 1.483 + setup_mbby_copy(&mbr_ei[i].mb, x); 1.484 + 1.485 + mbd->fullpixel_mask = 0xffffffff; 1.486 + if(cm->full_pixel) 1.487 + mbd->fullpixel_mask = 0xfffffff8; 1.488 + 1.489 + vp8_zero(mb->coef_counts); 1.490 + vp8_zero(x->ymode_count); 1.491 + mb->skip_true_count = 0; 1.492 + vp8_zero(mb->MVcount); 1.493 + mb->prediction_error = 0; 1.494 + mb->intra_error = 0; 1.495 + vp8_zero(mb->count_mb_ref_frame_usage); 1.496 + mb->mbs_tested_so_far = 0; 1.497 + } 1.498 +} 1.499 + 1.500 +int vp8cx_create_encoder_threads(VP8_COMP *cpi) 1.501 +{ 1.502 + const VP8_COMMON * cm = &cpi->common; 1.503 + 1.504 + cpi->b_multi_threaded = 0; 1.505 + cpi->encoding_thread_count = 0; 1.506 + cpi->b_lpf_running = 0; 1.507 + 1.508 + if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) 1.509 + { 1.510 + int ithread; 1.511 + int th_count = cpi->oxcf.multi_threaded - 1; 1.512 + int rc = 0; 1.513 + 1.514 + /* don't allocate more threads than cores available */ 1.515 + if (cpi->oxcf.multi_threaded > cm->processor_core_count) 1.516 + th_count = cm->processor_core_count - 1; 1.517 + 1.518 + /* we have th_count + 1 (main) threads processing one row each */ 1.519 + /* no point to have more threads than the sync range allows */ 1.520 + if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) 1.521 + { 1.522 + th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; 1.523 + } 1.524 + 1.525 + if(th_count == 0) 1.526 + return 0; 1.527 + 1.528 + CHECK_MEM_ERROR(cpi->h_encoding_thread, 1.529 + vpx_malloc(sizeof(pthread_t) * th_count)); 1.530 + CHECK_MEM_ERROR(cpi->h_event_start_encoding, 1.531 + vpx_malloc(sizeof(sem_t) * th_count)); 1.532 + CHECK_MEM_ERROR(cpi->mb_row_ei, 1.533 + vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); 1.534 + vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); 1.535 + CHECK_MEM_ERROR(cpi->en_thread_data, 1.536 + vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); 1.537 + 1.538 + sem_init(&cpi->h_event_end_encoding, 0, 0); 1.539 + 1.540 + cpi->b_multi_threaded = 1; 1.541 + cpi->encoding_thread_count = th_count; 1.542 + 1.543 + /* 1.544 + printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", 1.545 + (cpi->encoding_thread_count +1)); 1.546 + */ 1.547 + 1.548 + for (ithread = 0; ithread < th_count; ithread++) 1.549 + { 1.550 + ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; 1.551 + 1.552 + /* Setup block ptrs and offsets */ 1.553 + vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); 1.554 + vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); 1.555 + 1.556 + sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); 1.557 + 1.558 + ethd->ithread = ithread; 1.559 + ethd->ptr1 = (void *)cpi; 1.560 + ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; 1.561 + 1.562 + rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, 1.563 + thread_encoding_proc, ethd); 1.564 + if(rc) 1.565 + break; 1.566 + } 1.567 + 1.568 + if(rc) 1.569 + { 1.570 + /* shutdown other threads */ 1.571 + cpi->b_multi_threaded = 0; 1.572 + for(--ithread; ithread >= 0; ithread--) 1.573 + { 1.574 + pthread_join(cpi->h_encoding_thread[ithread], 0); 1.575 + sem_destroy(&cpi->h_event_start_encoding[ithread]); 1.576 + } 1.577 + sem_destroy(&cpi->h_event_end_encoding); 1.578 + 1.579 + /* free thread related resources */ 1.580 + vpx_free(cpi->h_event_start_encoding); 1.581 + vpx_free(cpi->h_encoding_thread); 1.582 + vpx_free(cpi->mb_row_ei); 1.583 + vpx_free(cpi->en_thread_data); 1.584 + 1.585 + return -1; 1.586 + } 1.587 + 1.588 + 1.589 + { 1.590 + LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; 1.591 + 1.592 + sem_init(&cpi->h_event_start_lpf, 0, 0); 1.593 + sem_init(&cpi->h_event_end_lpf, 0, 0); 1.594 + 1.595 + lpfthd->ptr1 = (void *)cpi; 1.596 + rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, 1.597 + lpfthd); 1.598 + 1.599 + if(rc) 1.600 + { 1.601 + /* shutdown other threads */ 1.602 + cpi->b_multi_threaded = 0; 1.603 + for(--ithread; ithread >= 0; ithread--) 1.604 + { 1.605 + sem_post(&cpi->h_event_start_encoding[ithread]); 1.606 + pthread_join(cpi->h_encoding_thread[ithread], 0); 1.607 + sem_destroy(&cpi->h_event_start_encoding[ithread]); 1.608 + } 1.609 + sem_destroy(&cpi->h_event_end_encoding); 1.610 + sem_destroy(&cpi->h_event_end_lpf); 1.611 + sem_destroy(&cpi->h_event_start_lpf); 1.612 + 1.613 + /* free thread related resources */ 1.614 + vpx_free(cpi->h_event_start_encoding); 1.615 + vpx_free(cpi->h_encoding_thread); 1.616 + vpx_free(cpi->mb_row_ei); 1.617 + vpx_free(cpi->en_thread_data); 1.618 + 1.619 + return -2; 1.620 + } 1.621 + } 1.622 + } 1.623 + return 0; 1.624 +} 1.625 + 1.626 +void vp8cx_remove_encoder_threads(VP8_COMP *cpi) 1.627 +{ 1.628 + if (cpi->b_multi_threaded) 1.629 + { 1.630 + /* shutdown other threads */ 1.631 + cpi->b_multi_threaded = 0; 1.632 + { 1.633 + int i; 1.634 + 1.635 + for (i = 0; i < cpi->encoding_thread_count; i++) 1.636 + { 1.637 + sem_post(&cpi->h_event_start_encoding[i]); 1.638 + pthread_join(cpi->h_encoding_thread[i], 0); 1.639 + 1.640 + sem_destroy(&cpi->h_event_start_encoding[i]); 1.641 + } 1.642 + 1.643 + sem_post(&cpi->h_event_start_lpf); 1.644 + pthread_join(cpi->h_filter_thread, 0); 1.645 + } 1.646 + 1.647 + sem_destroy(&cpi->h_event_end_encoding); 1.648 + sem_destroy(&cpi->h_event_end_lpf); 1.649 + sem_destroy(&cpi->h_event_start_lpf); 1.650 + 1.651 + /* free thread related resources */ 1.652 + vpx_free(cpi->h_event_start_encoding); 1.653 + vpx_free(cpi->h_encoding_thread); 1.654 + vpx_free(cpi->mb_row_ei); 1.655 + vpx_free(cpi->en_thread_data); 1.656 + } 1.657 +} 1.658 +#endif