michael@0: /* michael@0: * Copyright (c) 2010 The WebM project authors. All Rights Reserved. michael@0: * michael@0: * Use of this source code is governed by a BSD-style license michael@0: * that can be found in the LICENSE file in the root of the source michael@0: * tree. An additional intellectual property rights grant can be found michael@0: * in the file PATENTS. All contributing project authors may michael@0: * be found in the AUTHORS file in the root of the source tree. michael@0: */ michael@0: michael@0: #include "onyx_int.h" michael@0: #include "vp8/common/threading.h" michael@0: #include "vp8/common/common.h" michael@0: #include "vp8/common/extend.h" michael@0: #include "bitstream.h" michael@0: #include "encodeframe.h" michael@0: michael@0: #if CONFIG_MULTITHREAD michael@0: michael@0: extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); michael@0: michael@0: extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); michael@0: michael@0: static THREAD_FUNCTION thread_loopfilter(void *p_data) michael@0: { michael@0: VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); michael@0: VP8_COMMON *cm = &cpi->common; michael@0: michael@0: while (1) michael@0: { michael@0: if (cpi->b_multi_threaded == 0) michael@0: break; michael@0: michael@0: if (sem_wait(&cpi->h_event_start_lpf) == 0) michael@0: { michael@0: if (cpi->b_multi_threaded == 0) /* we're shutting down */ michael@0: break; michael@0: michael@0: vp8_loopfilter_frame(cpi, cm); michael@0: michael@0: sem_post(&cpi->h_event_end_lpf); michael@0: } michael@0: } michael@0: michael@0: return 0; michael@0: } michael@0: michael@0: static michael@0: THREAD_FUNCTION thread_encoding_proc(void *p_data) michael@0: { michael@0: int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; michael@0: VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); michael@0: MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); michael@0: ENTROPY_CONTEXT_PLANES mb_row_left_context; michael@0: michael@0: while (1) michael@0: { michael@0: if (cpi->b_multi_threaded == 0) michael@0: break; michael@0: michael@0: if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) michael@0: { michael@0: const int nsync = cpi->mt_sync_range; michael@0: VP8_COMMON *cm = &cpi->common; michael@0: int mb_row; michael@0: MACROBLOCK *x = &mbri->mb; michael@0: MACROBLOCKD *xd = &x->e_mbd; michael@0: TOKENEXTRA *tp ; michael@0: #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING michael@0: TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24); michael@0: const int num_part = (1 << cm->multi_token_partition); michael@0: #endif michael@0: michael@0: int *segment_counts = mbri->segment_counts; michael@0: int *totalrate = &mbri->totalrate; michael@0: michael@0: if (cpi->b_multi_threaded == 0) /* we're shutting down */ michael@0: break; michael@0: michael@0: for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) michael@0: { michael@0: michael@0: int recon_yoffset, recon_uvoffset; michael@0: int mb_col; michael@0: int ref_fb_idx = cm->lst_fb_idx; michael@0: int dst_fb_idx = cm->new_fb_idx; michael@0: int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; michael@0: int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; michael@0: int map_index = (mb_row * cm->mb_cols); michael@0: volatile const int *last_row_current_mb_col; michael@0: volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; michael@0: michael@0: #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) michael@0: vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; michael@0: #else michael@0: tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); michael@0: cpi->tplist[mb_row].start = tp; michael@0: #endif michael@0: michael@0: last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; michael@0: michael@0: /* reset above block coeffs */ michael@0: xd->above_context = cm->above_context; michael@0: xd->left_context = &mb_row_left_context; michael@0: michael@0: vp8_zero(mb_row_left_context); michael@0: michael@0: xd->up_available = (mb_row != 0); michael@0: recon_yoffset = (mb_row * recon_y_stride * 16); michael@0: recon_uvoffset = (mb_row * recon_uv_stride * 8); michael@0: michael@0: /* Set the mb activity pointer to the start of the row. */ michael@0: x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; michael@0: michael@0: /* for each macroblock col in image */ michael@0: for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) michael@0: { michael@0: *current_mb_col = mb_col - 1; michael@0: michael@0: if ((mb_col & (nsync - 1)) == 0) michael@0: { michael@0: while (mb_col > (*last_row_current_mb_col - nsync)) michael@0: { michael@0: x86_pause_hint(); michael@0: thread_sleep(0); michael@0: } michael@0: } michael@0: michael@0: #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING michael@0: tp = tp_start; michael@0: #endif michael@0: michael@0: /* Distance of Mb to the various image edges. michael@0: * These specified to 8th pel as they are always compared michael@0: * to values that are in 1/8th pel units michael@0: */ michael@0: xd->mb_to_left_edge = -((mb_col * 16) << 3); michael@0: xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; michael@0: xd->mb_to_top_edge = -((mb_row * 16) << 3); michael@0: xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; michael@0: michael@0: /* Set up limit values for motion vectors used to prevent michael@0: * them extending outside the UMV borders michael@0: */ michael@0: x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); michael@0: x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); michael@0: x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); michael@0: x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); michael@0: michael@0: xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; michael@0: xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; michael@0: xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; michael@0: xd->left_available = (mb_col != 0); michael@0: michael@0: x->rddiv = cpi->RDDIV; michael@0: x->rdmult = cpi->RDMULT; michael@0: michael@0: /* Copy current mb to a buffer */ michael@0: vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); michael@0: michael@0: if (cpi->oxcf.tuning == VP8_TUNE_SSIM) michael@0: vp8_activity_masking(cpi, x); michael@0: michael@0: /* Is segmentation enabled */ michael@0: /* MB level adjustment to quantizer */ michael@0: if (xd->segmentation_enabled) michael@0: { michael@0: /* Code to set segment id in xd->mbmi.segment_id for michael@0: * current MB (with range checking) michael@0: */ michael@0: if (cpi->segmentation_map[map_index + mb_col] <= 3) michael@0: xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; michael@0: else michael@0: xd->mode_info_context->mbmi.segment_id = 0; michael@0: michael@0: vp8cx_mb_init_quantizer(cpi, x, 1); michael@0: } michael@0: else michael@0: /* Set to Segment 0 by default */ michael@0: xd->mode_info_context->mbmi.segment_id = 0; michael@0: michael@0: x->active_ptr = cpi->active_map + map_index + mb_col; michael@0: michael@0: if (cm->frame_type == KEY_FRAME) michael@0: { michael@0: *totalrate += vp8cx_encode_intra_macroblock(cpi, x, &tp); michael@0: #ifdef MODE_STATS michael@0: y_modes[xd->mbmi.mode] ++; michael@0: #endif michael@0: } michael@0: else michael@0: { michael@0: *totalrate += vp8cx_encode_inter_macroblock(cpi, x, &tp, recon_yoffset, recon_uvoffset, mb_row, mb_col); michael@0: michael@0: #ifdef MODE_STATS michael@0: inter_y_modes[xd->mbmi.mode] ++; michael@0: michael@0: if (xd->mbmi.mode == SPLITMV) michael@0: { michael@0: int b; michael@0: michael@0: for (b = 0; b < xd->mbmi.partition_count; b++) michael@0: { michael@0: inter_b_modes[x->partition->bmi[b].mode] ++; michael@0: } michael@0: } michael@0: michael@0: #endif michael@0: michael@0: /* Special case code for cyclic refresh michael@0: * If cyclic update enabled then copy michael@0: * xd->mbmi.segment_id; (which may have been updated michael@0: * based on mode during michael@0: * vp8cx_encode_inter_macroblock()) back into the michael@0: * global segmentation map michael@0: */ michael@0: if ((cpi->current_layer == 0) && michael@0: (cpi->cyclic_refresh_mode_enabled && michael@0: xd->segmentation_enabled)) michael@0: { michael@0: const MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; michael@0: cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; michael@0: michael@0: /* If the block has been refreshed mark it as clean michael@0: * (the magnitude of the -ve influences how long it michael@0: * will be before we consider another refresh): michael@0: * Else if it was coded (last frame 0,0) and has michael@0: * not already been refreshed then mark it as a michael@0: * candidate for cleanup next time (marked 0) else michael@0: * mark it as dirty (1). michael@0: */ michael@0: if (mbmi->segment_id) michael@0: cpi->cyclic_refresh_map[map_index + mb_col] = -1; michael@0: else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) michael@0: { michael@0: if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) michael@0: cpi->cyclic_refresh_map[map_index + mb_col] = 0; michael@0: } michael@0: else michael@0: cpi->cyclic_refresh_map[map_index + mb_col] = 1; michael@0: michael@0: } michael@0: } michael@0: michael@0: #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING michael@0: /* pack tokens for this MB */ michael@0: { michael@0: int tok_count = tp - tp_start; michael@0: pack_tokens(w, tp_start, tok_count); michael@0: } michael@0: #else michael@0: cpi->tplist[mb_row].stop = tp; michael@0: #endif michael@0: /* Increment pointer into gf usage flags structure. */ michael@0: x->gf_active_ptr++; michael@0: michael@0: /* Increment the activity mask pointers. */ michael@0: x->mb_activity_ptr++; michael@0: michael@0: /* adjust to the next column of macroblocks */ michael@0: x->src.y_buffer += 16; michael@0: x->src.u_buffer += 8; michael@0: x->src.v_buffer += 8; michael@0: michael@0: recon_yoffset += 16; michael@0: recon_uvoffset += 8; michael@0: michael@0: /* Keep track of segment usage */ michael@0: segment_counts[xd->mode_info_context->mbmi.segment_id]++; michael@0: michael@0: /* skip to next mb */ michael@0: xd->mode_info_context++; michael@0: x->partition_info++; michael@0: xd->above_context++; michael@0: } michael@0: michael@0: vp8_extend_mb_row( &cm->yv12_fb[dst_fb_idx], michael@0: xd->dst.y_buffer + 16, michael@0: xd->dst.u_buffer + 8, michael@0: xd->dst.v_buffer + 8); michael@0: michael@0: *current_mb_col = mb_col + nsync; michael@0: michael@0: /* this is to account for the border */ michael@0: xd->mode_info_context++; michael@0: x->partition_info++; michael@0: michael@0: x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; michael@0: x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; michael@0: x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; michael@0: michael@0: xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; michael@0: x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; michael@0: x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; michael@0: michael@0: if (mb_row == cm->mb_rows - 1) michael@0: { michael@0: sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */ michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* printf("exit thread %d\n", ithread); */ michael@0: return 0; michael@0: } michael@0: michael@0: static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) michael@0: { michael@0: michael@0: MACROBLOCK *x = mbsrc; michael@0: MACROBLOCK *z = mbdst; michael@0: int i; michael@0: michael@0: z->ss = x->ss; michael@0: z->ss_count = x->ss_count; michael@0: z->searches_per_step = x->searches_per_step; michael@0: z->errorperbit = x->errorperbit; michael@0: michael@0: z->sadperbit16 = x->sadperbit16; michael@0: z->sadperbit4 = x->sadperbit4; michael@0: michael@0: /* michael@0: z->mv_col_min = x->mv_col_min; michael@0: z->mv_col_max = x->mv_col_max; michael@0: z->mv_row_min = x->mv_row_min; michael@0: z->mv_row_max = x->mv_row_max; michael@0: */ michael@0: michael@0: z->short_fdct4x4 = x->short_fdct4x4; michael@0: z->short_fdct8x4 = x->short_fdct8x4; michael@0: z->short_walsh4x4 = x->short_walsh4x4; michael@0: z->quantize_b = x->quantize_b; michael@0: z->quantize_b_pair = x->quantize_b_pair; michael@0: z->optimize = x->optimize; michael@0: michael@0: /* michael@0: z->mvc = x->mvc; michael@0: z->src.y_buffer = x->src.y_buffer; michael@0: z->src.u_buffer = x->src.u_buffer; michael@0: z->src.v_buffer = x->src.v_buffer; michael@0: */ michael@0: michael@0: z->mvcost[0] = x->mvcost[0]; michael@0: z->mvcost[1] = x->mvcost[1]; michael@0: z->mvsadcost[0] = x->mvsadcost[0]; michael@0: z->mvsadcost[1] = x->mvsadcost[1]; michael@0: michael@0: z->token_costs = x->token_costs; michael@0: z->inter_bmode_costs = x->inter_bmode_costs; michael@0: z->mbmode_cost = x->mbmode_cost; michael@0: z->intra_uv_mode_cost = x->intra_uv_mode_cost; michael@0: z->bmode_costs = x->bmode_costs; michael@0: michael@0: for (i = 0; i < 25; i++) michael@0: { michael@0: z->block[i].quant = x->block[i].quant; michael@0: z->block[i].quant_fast = x->block[i].quant_fast; michael@0: z->block[i].quant_shift = x->block[i].quant_shift; michael@0: z->block[i].zbin = x->block[i].zbin; michael@0: z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; michael@0: z->block[i].round = x->block[i].round; michael@0: z->block[i].src_stride = x->block[i].src_stride; michael@0: } michael@0: michael@0: z->q_index = x->q_index; michael@0: z->act_zbin_adj = x->act_zbin_adj; michael@0: z->last_act_zbin_adj = x->last_act_zbin_adj; michael@0: michael@0: { michael@0: MACROBLOCKD *xd = &x->e_mbd; michael@0: MACROBLOCKD *zd = &z->e_mbd; michael@0: michael@0: /* michael@0: zd->mode_info_context = xd->mode_info_context; michael@0: zd->mode_info = xd->mode_info; michael@0: michael@0: zd->mode_info_stride = xd->mode_info_stride; michael@0: zd->frame_type = xd->frame_type; michael@0: zd->up_available = xd->up_available ; michael@0: zd->left_available = xd->left_available; michael@0: zd->left_context = xd->left_context; michael@0: zd->last_frame_dc = xd->last_frame_dc; michael@0: zd->last_frame_dccons = xd->last_frame_dccons; michael@0: zd->gold_frame_dc = xd->gold_frame_dc; michael@0: zd->gold_frame_dccons = xd->gold_frame_dccons; michael@0: zd->mb_to_left_edge = xd->mb_to_left_edge; michael@0: zd->mb_to_right_edge = xd->mb_to_right_edge; michael@0: zd->mb_to_top_edge = xd->mb_to_top_edge ; michael@0: zd->mb_to_bottom_edge = xd->mb_to_bottom_edge; michael@0: zd->gf_active_ptr = xd->gf_active_ptr; michael@0: zd->frames_since_golden = xd->frames_since_golden; michael@0: zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame; michael@0: */ michael@0: zd->subpixel_predict = xd->subpixel_predict; michael@0: zd->subpixel_predict8x4 = xd->subpixel_predict8x4; michael@0: zd->subpixel_predict8x8 = xd->subpixel_predict8x8; michael@0: zd->subpixel_predict16x16 = xd->subpixel_predict16x16; michael@0: zd->segmentation_enabled = xd->segmentation_enabled; michael@0: zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; michael@0: vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, michael@0: sizeof(xd->segment_feature_data)); michael@0: michael@0: vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, michael@0: sizeof(xd->dequant_y1_dc)); michael@0: vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); michael@0: vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); michael@0: vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); michael@0: michael@0: #if 1 michael@0: /*TODO: Remove dequant from BLOCKD. This is a temporary solution until michael@0: * the quantizer code uses a passed in pointer to the dequant constants. michael@0: * This will also require modifications to the x86 and neon assembly. michael@0: * */ michael@0: for (i = 0; i < 16; i++) michael@0: zd->block[i].dequant = zd->dequant_y1; michael@0: for (i = 16; i < 24; i++) michael@0: zd->block[i].dequant = zd->dequant_uv; michael@0: zd->block[24].dequant = zd->dequant_y2; michael@0: #endif michael@0: michael@0: michael@0: vpx_memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); michael@0: vpx_memcpy(z->rd_thresh_mult, x->rd_thresh_mult, michael@0: sizeof(x->rd_thresh_mult)); michael@0: michael@0: z->zbin_over_quant = x->zbin_over_quant; michael@0: z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; michael@0: z->zbin_mode_boost = x->zbin_mode_boost; michael@0: michael@0: vpx_memset(z->error_bins, 0, sizeof(z->error_bins)); michael@0: } michael@0: } michael@0: michael@0: void vp8cx_init_mbrthread_data(VP8_COMP *cpi, michael@0: MACROBLOCK *x, michael@0: MB_ROW_COMP *mbr_ei, michael@0: int count michael@0: ) michael@0: { michael@0: michael@0: VP8_COMMON *const cm = & cpi->common; michael@0: MACROBLOCKD *const xd = & x->e_mbd; michael@0: int i; michael@0: michael@0: for (i = 0; i < count; i++) michael@0: { michael@0: MACROBLOCK *mb = & mbr_ei[i].mb; michael@0: MACROBLOCKD *mbd = &mb->e_mbd; michael@0: michael@0: mbd->subpixel_predict = xd->subpixel_predict; michael@0: mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; michael@0: mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; michael@0: mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; michael@0: mb->gf_active_ptr = x->gf_active_ptr; michael@0: michael@0: vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); michael@0: mbr_ei[i].totalrate = 0; michael@0: michael@0: mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); michael@0: michael@0: mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1); michael@0: mbd->mode_info_stride = cm->mode_info_stride; michael@0: michael@0: mbd->frame_type = cm->frame_type; michael@0: michael@0: mb->src = * cpi->Source; michael@0: mbd->pre = cm->yv12_fb[cm->lst_fb_idx]; michael@0: mbd->dst = cm->yv12_fb[cm->new_fb_idx]; michael@0: michael@0: mb->src.y_buffer += 16 * x->src.y_stride * (i + 1); michael@0: mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); michael@0: mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1); michael@0: michael@0: vp8_build_block_offsets(mb); michael@0: michael@0: mbd->left_context = &cm->left_context; michael@0: mb->mvc = cm->fc.mvc; michael@0: michael@0: setup_mbby_copy(&mbr_ei[i].mb, x); michael@0: michael@0: mbd->fullpixel_mask = 0xffffffff; michael@0: if(cm->full_pixel) michael@0: mbd->fullpixel_mask = 0xfffffff8; michael@0: michael@0: vp8_zero(mb->coef_counts); michael@0: vp8_zero(x->ymode_count); michael@0: mb->skip_true_count = 0; michael@0: vp8_zero(mb->MVcount); michael@0: mb->prediction_error = 0; michael@0: mb->intra_error = 0; michael@0: vp8_zero(mb->count_mb_ref_frame_usage); michael@0: mb->mbs_tested_so_far = 0; michael@0: } michael@0: } michael@0: michael@0: int vp8cx_create_encoder_threads(VP8_COMP *cpi) michael@0: { michael@0: const VP8_COMMON * cm = &cpi->common; michael@0: michael@0: cpi->b_multi_threaded = 0; michael@0: cpi->encoding_thread_count = 0; michael@0: cpi->b_lpf_running = 0; michael@0: michael@0: if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) michael@0: { michael@0: int ithread; michael@0: int th_count = cpi->oxcf.multi_threaded - 1; michael@0: int rc = 0; michael@0: michael@0: /* don't allocate more threads than cores available */ michael@0: if (cpi->oxcf.multi_threaded > cm->processor_core_count) michael@0: th_count = cm->processor_core_count - 1; michael@0: michael@0: /* we have th_count + 1 (main) threads processing one row each */ michael@0: /* no point to have more threads than the sync range allows */ michael@0: if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) michael@0: { michael@0: th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; michael@0: } michael@0: michael@0: if(th_count == 0) michael@0: return 0; michael@0: michael@0: CHECK_MEM_ERROR(cpi->h_encoding_thread, michael@0: vpx_malloc(sizeof(pthread_t) * th_count)); michael@0: CHECK_MEM_ERROR(cpi->h_event_start_encoding, michael@0: vpx_malloc(sizeof(sem_t) * th_count)); michael@0: CHECK_MEM_ERROR(cpi->mb_row_ei, michael@0: vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); michael@0: vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); michael@0: CHECK_MEM_ERROR(cpi->en_thread_data, michael@0: vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); michael@0: michael@0: sem_init(&cpi->h_event_end_encoding, 0, 0); michael@0: michael@0: cpi->b_multi_threaded = 1; michael@0: cpi->encoding_thread_count = th_count; michael@0: michael@0: /* michael@0: printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", michael@0: (cpi->encoding_thread_count +1)); michael@0: */ michael@0: michael@0: for (ithread = 0; ithread < th_count; ithread++) michael@0: { michael@0: ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; michael@0: michael@0: /* Setup block ptrs and offsets */ michael@0: vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); michael@0: vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); michael@0: michael@0: sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); michael@0: michael@0: ethd->ithread = ithread; michael@0: ethd->ptr1 = (void *)cpi; michael@0: ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; michael@0: michael@0: rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, michael@0: thread_encoding_proc, ethd); michael@0: if(rc) michael@0: break; michael@0: } michael@0: michael@0: if(rc) michael@0: { michael@0: /* shutdown other threads */ michael@0: cpi->b_multi_threaded = 0; michael@0: for(--ithread; ithread >= 0; ithread--) michael@0: { michael@0: pthread_join(cpi->h_encoding_thread[ithread], 0); michael@0: sem_destroy(&cpi->h_event_start_encoding[ithread]); michael@0: } michael@0: sem_destroy(&cpi->h_event_end_encoding); michael@0: michael@0: /* free thread related resources */ michael@0: vpx_free(cpi->h_event_start_encoding); michael@0: vpx_free(cpi->h_encoding_thread); michael@0: vpx_free(cpi->mb_row_ei); michael@0: vpx_free(cpi->en_thread_data); michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: michael@0: { michael@0: LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; michael@0: michael@0: sem_init(&cpi->h_event_start_lpf, 0, 0); michael@0: sem_init(&cpi->h_event_end_lpf, 0, 0); michael@0: michael@0: lpfthd->ptr1 = (void *)cpi; michael@0: rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, michael@0: lpfthd); michael@0: michael@0: if(rc) michael@0: { michael@0: /* shutdown other threads */ michael@0: cpi->b_multi_threaded = 0; michael@0: for(--ithread; ithread >= 0; ithread--) michael@0: { michael@0: sem_post(&cpi->h_event_start_encoding[ithread]); michael@0: pthread_join(cpi->h_encoding_thread[ithread], 0); michael@0: sem_destroy(&cpi->h_event_start_encoding[ithread]); michael@0: } michael@0: sem_destroy(&cpi->h_event_end_encoding); michael@0: sem_destroy(&cpi->h_event_end_lpf); michael@0: sem_destroy(&cpi->h_event_start_lpf); michael@0: michael@0: /* free thread related resources */ michael@0: vpx_free(cpi->h_event_start_encoding); michael@0: vpx_free(cpi->h_encoding_thread); michael@0: vpx_free(cpi->mb_row_ei); michael@0: vpx_free(cpi->en_thread_data); michael@0: michael@0: return -2; michael@0: } michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: void vp8cx_remove_encoder_threads(VP8_COMP *cpi) michael@0: { michael@0: if (cpi->b_multi_threaded) michael@0: { michael@0: /* shutdown other threads */ michael@0: cpi->b_multi_threaded = 0; michael@0: { michael@0: int i; michael@0: michael@0: for (i = 0; i < cpi->encoding_thread_count; i++) michael@0: { michael@0: sem_post(&cpi->h_event_start_encoding[i]); michael@0: pthread_join(cpi->h_encoding_thread[i], 0); michael@0: michael@0: sem_destroy(&cpi->h_event_start_encoding[i]); michael@0: } michael@0: michael@0: sem_post(&cpi->h_event_start_lpf); michael@0: pthread_join(cpi->h_filter_thread, 0); michael@0: } michael@0: michael@0: sem_destroy(&cpi->h_event_end_encoding); michael@0: sem_destroy(&cpi->h_event_end_lpf); michael@0: sem_destroy(&cpi->h_event_start_lpf); michael@0: michael@0: /* free thread related resources */ michael@0: vpx_free(cpi->h_event_start_encoding); michael@0: vpx_free(cpi->h_encoding_thread); michael@0: vpx_free(cpi->mb_row_ei); michael@0: vpx_free(cpi->en_thread_data); michael@0: } michael@0: } michael@0: #endif