1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libtheora/lib/arm/armstate.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,219 @@ 1.4 +/******************************************************************** 1.5 + * * 1.6 + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * 1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 1.10 + * * 1.11 + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * 1.12 + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * 1.13 + * * 1.14 + ******************************************************************** 1.15 + 1.16 + function: 1.17 + last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $ 1.18 + 1.19 + ********************************************************************/ 1.20 +#include "armint.h" 1.21 + 1.22 +#if defined(OC_ARM_ASM) 1.23 + 1.24 +# if defined(OC_ARM_ASM_NEON) 1.25 +/*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into 1.26 + the destination.*/ 1.27 +static const unsigned char OC_FZIG_ZAG_NEON[128]={ 1.28 + 0, 8, 1, 2, 9,16,24,17, 1.29 + 10, 3, 4,11,18,25,32,40, 1.30 + 33,26,19,12, 5, 6,13,20, 1.31 + 27,34,41,48,56,49,42,35, 1.32 + 28,21,14, 7,15,22,29,36, 1.33 + 43,50,57,58,51,44,37,30, 1.34 + 23,31,38,45,52,59,60,53, 1.35 + 46,39,47,54,61,62,55,63, 1.36 + 64,64,64,64,64,64,64,64, 1.37 + 64,64,64,64,64,64,64,64, 1.38 + 64,64,64,64,64,64,64,64, 1.39 + 64,64,64,64,64,64,64,64, 1.40 + 64,64,64,64,64,64,64,64, 1.41 + 64,64,64,64,64,64,64,64, 1.42 + 64,64,64,64,64,64,64,64, 1.43 + 64,64,64,64,64,64,64,64 1.44 +}; 1.45 +# endif 1.46 + 1.47 +void oc_state_accel_init_arm(oc_theora_state *_state){ 1.48 + oc_state_accel_init_c(_state); 1.49 + _state->cpu_flags=oc_cpu_flags_get(); 1.50 +# if defined(OC_STATE_USE_VTABLE) 1.51 + _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm; 1.52 + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm; 1.53 + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm; 1.54 + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm; 1.55 + _state->opt_vtable.idct8x8=oc_idct8x8_arm; 1.56 + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm; 1.57 + /*Note: We _must_ set this function pointer, because the macro in armint.h 1.58 + calls it with different arguments, so the C version will segfault.*/ 1.59 + _state->opt_vtable.state_loop_filter_frag_rows= 1.60 + (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm; 1.61 +# endif 1.62 +# if defined(OC_ARM_ASM_EDSP) 1.63 + if(_state->cpu_flags&OC_CPU_ARM_EDSP){ 1.64 +# if defined(OC_STATE_USE_VTABLE) 1.65 + _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp; 1.66 +# endif 1.67 + } 1.68 +# if defined(OC_ARM_ASM_MEDIA) 1.69 + if(_state->cpu_flags&OC_CPU_ARM_MEDIA){ 1.70 +# if defined(OC_STATE_USE_VTABLE) 1.71 + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6; 1.72 + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6; 1.73 + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6; 1.74 + _state->opt_vtable.idct8x8=oc_idct8x8_v6; 1.75 + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6; 1.76 + _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6; 1.77 + _state->opt_vtable.state_loop_filter_frag_rows= 1.78 + (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6; 1.79 +# endif 1.80 + } 1.81 +# if defined(OC_ARM_ASM_NEON) 1.82 + if(_state->cpu_flags&OC_CPU_ARM_NEON){ 1.83 +# if defined(OC_STATE_USE_VTABLE) 1.84 + _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon; 1.85 + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon; 1.86 + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon; 1.87 + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon; 1.88 + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon; 1.89 + _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon; 1.90 + _state->opt_vtable.state_loop_filter_frag_rows= 1.91 + (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon; 1.92 + _state->opt_vtable.idct8x8=oc_idct8x8_neon; 1.93 +# endif 1.94 + _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON; 1.95 + } 1.96 +# endif 1.97 +# endif 1.98 +# endif 1.99 +} 1.100 + 1.101 +void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi, 1.102 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ 1.103 + unsigned char *dst; 1.104 + ptrdiff_t frag_buf_off; 1.105 + int ystride; 1.106 + int refi; 1.107 + /*Apply the inverse transform.*/ 1.108 + /*Special case only having a DC component.*/ 1.109 + if(_last_zzi<2){ 1.110 + ogg_uint16_t p; 1.111 + /*We round this dequant product (and not any of the others) because there's 1.112 + no iDCT rounding.*/ 1.113 + p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); 1.114 + oc_idct8x8_1_arm(_dct_coeffs+64,p); 1.115 + } 1.116 + else{ 1.117 + /*First, dequantize the DC coefficient.*/ 1.118 + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); 1.119 + oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi); 1.120 + } 1.121 + /*Fill in the target buffer.*/ 1.122 + frag_buf_off=_state->frag_buf_offs[_fragi]; 1.123 + refi=_state->frags[_fragi].refi; 1.124 + ystride=_state->ref_ystride[_pli]; 1.125 + dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; 1.126 + if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64); 1.127 + else{ 1.128 + const unsigned char *ref; 1.129 + int mvoffsets[2]; 1.130 + ref=_state->ref_frame_data[refi]+frag_buf_off; 1.131 + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, 1.132 + _state->frag_mvs[_fragi])>1){ 1.133 + oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, 1.134 + _dct_coeffs+64); 1.135 + } 1.136 + else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); 1.137 + } 1.138 +} 1.139 + 1.140 +# if defined(OC_ARM_ASM_MEDIA) 1.141 +void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi, 1.142 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ 1.143 + unsigned char *dst; 1.144 + ptrdiff_t frag_buf_off; 1.145 + int ystride; 1.146 + int refi; 1.147 + /*Apply the inverse transform.*/ 1.148 + /*Special case only having a DC component.*/ 1.149 + if(_last_zzi<2){ 1.150 + ogg_uint16_t p; 1.151 + /*We round this dequant product (and not any of the others) because there's 1.152 + no iDCT rounding.*/ 1.153 + p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); 1.154 + oc_idct8x8_1_v6(_dct_coeffs+64,p); 1.155 + } 1.156 + else{ 1.157 + /*First, dequantize the DC coefficient.*/ 1.158 + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); 1.159 + oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi); 1.160 + } 1.161 + /*Fill in the target buffer.*/ 1.162 + frag_buf_off=_state->frag_buf_offs[_fragi]; 1.163 + refi=_state->frags[_fragi].refi; 1.164 + ystride=_state->ref_ystride[_pli]; 1.165 + dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; 1.166 + if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64); 1.167 + else{ 1.168 + const unsigned char *ref; 1.169 + int mvoffsets[2]; 1.170 + ref=_state->ref_frame_data[refi]+frag_buf_off; 1.171 + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, 1.172 + _state->frag_mvs[_fragi])>1){ 1.173 + oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, 1.174 + _dct_coeffs+64); 1.175 + } 1.176 + else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); 1.177 + } 1.178 +} 1.179 + 1.180 +# if defined(OC_ARM_ASM_NEON) 1.181 +void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi, 1.182 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ 1.183 + unsigned char *dst; 1.184 + ptrdiff_t frag_buf_off; 1.185 + int ystride; 1.186 + int refi; 1.187 + /*Apply the inverse transform.*/ 1.188 + /*Special case only having a DC component.*/ 1.189 + if(_last_zzi<2){ 1.190 + ogg_uint16_t p; 1.191 + /*We round this dequant product (and not any of the others) because there's 1.192 + no iDCT rounding.*/ 1.193 + p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); 1.194 + oc_idct8x8_1_neon(_dct_coeffs+64,p); 1.195 + } 1.196 + else{ 1.197 + /*First, dequantize the DC coefficient.*/ 1.198 + _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); 1.199 + oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi); 1.200 + } 1.201 + /*Fill in the target buffer.*/ 1.202 + frag_buf_off=_state->frag_buf_offs[_fragi]; 1.203 + refi=_state->frags[_fragi].refi; 1.204 + ystride=_state->ref_ystride[_pli]; 1.205 + dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; 1.206 + if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64); 1.207 + else{ 1.208 + const unsigned char *ref; 1.209 + int mvoffsets[2]; 1.210 + ref=_state->ref_frame_data[refi]+frag_buf_off; 1.211 + if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, 1.212 + _state->frag_mvs[_fragi])>1){ 1.213 + oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, 1.214 + _dct_coeffs+64); 1.215 + } 1.216 + else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); 1.217 + } 1.218 +} 1.219 +# endif 1.220 +# endif 1.221 + 1.222 +#endif