michael@0: /******************************************************************** michael@0: * * michael@0: * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * michael@0: * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * michael@0: * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * michael@0: * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * michael@0: * * michael@0: * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * michael@0: * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * michael@0: * * michael@0: ******************************************************************** michael@0: michael@0: function: michael@0: last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $ michael@0: michael@0: ********************************************************************/ michael@0: #include "armint.h" michael@0: michael@0: #if defined(OC_ARM_ASM) michael@0: michael@0: # if defined(OC_ARM_ASM_NEON) michael@0: /*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into michael@0: the destination.*/ michael@0: static const unsigned char OC_FZIG_ZAG_NEON[128]={ michael@0: 0, 8, 1, 2, 9,16,24,17, michael@0: 10, 3, 4,11,18,25,32,40, michael@0: 33,26,19,12, 5, 6,13,20, michael@0: 27,34,41,48,56,49,42,35, michael@0: 28,21,14, 7,15,22,29,36, michael@0: 43,50,57,58,51,44,37,30, michael@0: 23,31,38,45,52,59,60,53, michael@0: 46,39,47,54,61,62,55,63, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64, michael@0: 64,64,64,64,64,64,64,64 michael@0: }; michael@0: # endif michael@0: michael@0: void oc_state_accel_init_arm(oc_theora_state *_state){ michael@0: oc_state_accel_init_c(_state); michael@0: _state->cpu_flags=oc_cpu_flags_get(); michael@0: # if defined(OC_STATE_USE_VTABLE) michael@0: _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm; michael@0: _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm; michael@0: _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm; michael@0: _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm; michael@0: _state->opt_vtable.idct8x8=oc_idct8x8_arm; michael@0: _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm; michael@0: /*Note: We _must_ set this function pointer, because the macro in armint.h michael@0: calls it with different arguments, so the C version will segfault.*/ michael@0: _state->opt_vtable.state_loop_filter_frag_rows= michael@0: (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm; michael@0: # endif michael@0: # if defined(OC_ARM_ASM_EDSP) michael@0: if(_state->cpu_flags&OC_CPU_ARM_EDSP){ michael@0: # if defined(OC_STATE_USE_VTABLE) michael@0: _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp; michael@0: # endif michael@0: } michael@0: # if defined(OC_ARM_ASM_MEDIA) michael@0: if(_state->cpu_flags&OC_CPU_ARM_MEDIA){ michael@0: # if defined(OC_STATE_USE_VTABLE) michael@0: _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6; michael@0: _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6; michael@0: _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6; michael@0: _state->opt_vtable.idct8x8=oc_idct8x8_v6; michael@0: _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6; michael@0: _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6; michael@0: _state->opt_vtable.state_loop_filter_frag_rows= michael@0: (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6; michael@0: # endif michael@0: } michael@0: # if defined(OC_ARM_ASM_NEON) michael@0: if(_state->cpu_flags&OC_CPU_ARM_NEON){ michael@0: # if defined(OC_STATE_USE_VTABLE) michael@0: _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon; michael@0: _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon; michael@0: _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon; michael@0: _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon; michael@0: _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon; michael@0: _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon; michael@0: _state->opt_vtable.state_loop_filter_frag_rows= michael@0: (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon; michael@0: _state->opt_vtable.idct8x8=oc_idct8x8_neon; michael@0: # endif michael@0: _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON; michael@0: } michael@0: # endif michael@0: # endif michael@0: # endif michael@0: } michael@0: michael@0: void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi, michael@0: int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ michael@0: unsigned char *dst; michael@0: ptrdiff_t frag_buf_off; michael@0: int ystride; michael@0: int refi; michael@0: /*Apply the inverse transform.*/ michael@0: /*Special case only having a DC component.*/ michael@0: if(_last_zzi<2){ michael@0: ogg_uint16_t p; michael@0: /*We round this dequant product (and not any of the others) because there's michael@0: no iDCT rounding.*/ michael@0: p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); michael@0: oc_idct8x8_1_arm(_dct_coeffs+64,p); michael@0: } michael@0: else{ michael@0: /*First, dequantize the DC coefficient.*/ michael@0: _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); michael@0: oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi); michael@0: } michael@0: /*Fill in the target buffer.*/ michael@0: frag_buf_off=_state->frag_buf_offs[_fragi]; michael@0: refi=_state->frags[_fragi].refi; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; michael@0: if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64); michael@0: else{ michael@0: const unsigned char *ref; michael@0: int mvoffsets[2]; michael@0: ref=_state->ref_frame_data[refi]+frag_buf_off; michael@0: if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, michael@0: _state->frag_mvs[_fragi])>1){ michael@0: oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, michael@0: _dct_coeffs+64); michael@0: } michael@0: else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); michael@0: } michael@0: } michael@0: michael@0: # if defined(OC_ARM_ASM_MEDIA) michael@0: void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi, michael@0: int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ michael@0: unsigned char *dst; michael@0: ptrdiff_t frag_buf_off; michael@0: int ystride; michael@0: int refi; michael@0: /*Apply the inverse transform.*/ michael@0: /*Special case only having a DC component.*/ michael@0: if(_last_zzi<2){ michael@0: ogg_uint16_t p; michael@0: /*We round this dequant product (and not any of the others) because there's michael@0: no iDCT rounding.*/ michael@0: p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); michael@0: oc_idct8x8_1_v6(_dct_coeffs+64,p); michael@0: } michael@0: else{ michael@0: /*First, dequantize the DC coefficient.*/ michael@0: _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); michael@0: oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi); michael@0: } michael@0: /*Fill in the target buffer.*/ michael@0: frag_buf_off=_state->frag_buf_offs[_fragi]; michael@0: refi=_state->frags[_fragi].refi; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; michael@0: if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64); michael@0: else{ michael@0: const unsigned char *ref; michael@0: int mvoffsets[2]; michael@0: ref=_state->ref_frame_data[refi]+frag_buf_off; michael@0: if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, michael@0: _state->frag_mvs[_fragi])>1){ michael@0: oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, michael@0: _dct_coeffs+64); michael@0: } michael@0: else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); michael@0: } michael@0: } michael@0: michael@0: # if defined(OC_ARM_ASM_NEON) michael@0: void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi, michael@0: int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ michael@0: unsigned char *dst; michael@0: ptrdiff_t frag_buf_off; michael@0: int ystride; michael@0: int refi; michael@0: /*Apply the inverse transform.*/ michael@0: /*Special case only having a DC component.*/ michael@0: if(_last_zzi<2){ michael@0: ogg_uint16_t p; michael@0: /*We round this dequant product (and not any of the others) because there's michael@0: no iDCT rounding.*/ michael@0: p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); michael@0: oc_idct8x8_1_neon(_dct_coeffs+64,p); michael@0: } michael@0: else{ michael@0: /*First, dequantize the DC coefficient.*/ michael@0: _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); michael@0: oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi); michael@0: } michael@0: /*Fill in the target buffer.*/ michael@0: frag_buf_off=_state->frag_buf_offs[_fragi]; michael@0: refi=_state->frags[_fragi].refi; michael@0: ystride=_state->ref_ystride[_pli]; michael@0: dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; michael@0: if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64); michael@0: else{ michael@0: const unsigned char *ref; michael@0: int mvoffsets[2]; michael@0: ref=_state->ref_frame_data[refi]+frag_buf_off; michael@0: if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, michael@0: _state->frag_mvs[_fragi])>1){ michael@0: oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, michael@0: _dct_coeffs+64); michael@0: } michael@0: else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); michael@0: } michael@0: } michael@0: # endif michael@0: # endif michael@0: michael@0: #endif