media/libtheora/lib/arm/armstate.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libtheora/lib/arm/armstate.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,219 @@
     1.4 +/********************************************************************
     1.5 + *                                                                  *
     1.6 + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
     1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
    1.10 + *                                                                  *
    1.11 + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
    1.12 + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
    1.13 + *                                                                  *
    1.14 + ********************************************************************
    1.15 +
    1.16 +  function:
    1.17 +    last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $
    1.18 +
    1.19 + ********************************************************************/
    1.20 +#include "armint.h"
    1.21 +
    1.22 +#if defined(OC_ARM_ASM)
    1.23 +
    1.24 +# if defined(OC_ARM_ASM_NEON)
    1.25 +/*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into
    1.26 +   the destination.*/
    1.27 +static const unsigned char OC_FZIG_ZAG_NEON[128]={
    1.28 +   0, 8, 1, 2, 9,16,24,17,
    1.29 +  10, 3, 4,11,18,25,32,40,
    1.30 +  33,26,19,12, 5, 6,13,20,
    1.31 +  27,34,41,48,56,49,42,35,
    1.32 +  28,21,14, 7,15,22,29,36,
    1.33 +  43,50,57,58,51,44,37,30,
    1.34 +  23,31,38,45,52,59,60,53,
    1.35 +  46,39,47,54,61,62,55,63,
    1.36 +  64,64,64,64,64,64,64,64,
    1.37 +  64,64,64,64,64,64,64,64,
    1.38 +  64,64,64,64,64,64,64,64,
    1.39 +  64,64,64,64,64,64,64,64,
    1.40 +  64,64,64,64,64,64,64,64,
    1.41 +  64,64,64,64,64,64,64,64,
    1.42 +  64,64,64,64,64,64,64,64,
    1.43 +  64,64,64,64,64,64,64,64
    1.44 +};
    1.45 +# endif
    1.46 +
    1.47 +void oc_state_accel_init_arm(oc_theora_state *_state){
    1.48 +  oc_state_accel_init_c(_state);
    1.49 +  _state->cpu_flags=oc_cpu_flags_get();
    1.50 +# if defined(OC_STATE_USE_VTABLE)
    1.51 +  _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm;
    1.52 +  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm;
    1.53 +  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm;
    1.54 +  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm;
    1.55 +  _state->opt_vtable.idct8x8=oc_idct8x8_arm;
    1.56 +  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm;
    1.57 +  /*Note: We _must_ set this function pointer, because the macro in armint.h
    1.58 +     calls it with different arguments, so the C version will segfault.*/
    1.59 +  _state->opt_vtable.state_loop_filter_frag_rows=
    1.60 +   (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm;
    1.61 +# endif
    1.62 +# if defined(OC_ARM_ASM_EDSP)
    1.63 +  if(_state->cpu_flags&OC_CPU_ARM_EDSP){
    1.64 +#  if defined(OC_STATE_USE_VTABLE)
    1.65 +    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp;
    1.66 +#  endif
    1.67 +  }
    1.68 +#  if defined(OC_ARM_ASM_MEDIA)
    1.69 +  if(_state->cpu_flags&OC_CPU_ARM_MEDIA){
    1.70 +#   if defined(OC_STATE_USE_VTABLE)
    1.71 +    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6;
    1.72 +    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6;
    1.73 +    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6;
    1.74 +    _state->opt_vtable.idct8x8=oc_idct8x8_v6;
    1.75 +    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6;
    1.76 +    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6;
    1.77 +    _state->opt_vtable.state_loop_filter_frag_rows=
    1.78 +     (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6;
    1.79 +#   endif
    1.80 +  }
    1.81 +#   if defined(OC_ARM_ASM_NEON)
    1.82 +  if(_state->cpu_flags&OC_CPU_ARM_NEON){
    1.83 +#    if defined(OC_STATE_USE_VTABLE)
    1.84 +    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon;
    1.85 +    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon;
    1.86 +    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon;
    1.87 +    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon;
    1.88 +    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon;
    1.89 +    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon;
    1.90 +    _state->opt_vtable.state_loop_filter_frag_rows=
    1.91 +     (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon;
    1.92 +    _state->opt_vtable.idct8x8=oc_idct8x8_neon;
    1.93 +#    endif
    1.94 +    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON;
    1.95 +  }
    1.96 +#   endif
    1.97 +#  endif
    1.98 +# endif
    1.99 +}
   1.100 +
   1.101 +void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi,
   1.102 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
   1.103 +  unsigned char *dst;
   1.104 +  ptrdiff_t      frag_buf_off;
   1.105 +  int            ystride;
   1.106 +  int            refi;
   1.107 +  /*Apply the inverse transform.*/
   1.108 +  /*Special case only having a DC component.*/
   1.109 +  if(_last_zzi<2){
   1.110 +    ogg_uint16_t p;
   1.111 +    /*We round this dequant product (and not any of the others) because there's
   1.112 +       no iDCT rounding.*/
   1.113 +    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
   1.114 +    oc_idct8x8_1_arm(_dct_coeffs+64,p);
   1.115 +  }
   1.116 +  else{
   1.117 +    /*First, dequantize the DC coefficient.*/
   1.118 +    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
   1.119 +    oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi);
   1.120 +  }
   1.121 +  /*Fill in the target buffer.*/
   1.122 +  frag_buf_off=_state->frag_buf_offs[_fragi];
   1.123 +  refi=_state->frags[_fragi].refi;
   1.124 +  ystride=_state->ref_ystride[_pli];
   1.125 +  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
   1.126 +  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
   1.127 +  else{
   1.128 +    const unsigned char *ref;
   1.129 +    int                  mvoffsets[2];
   1.130 +    ref=_state->ref_frame_data[refi]+frag_buf_off;
   1.131 +    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
   1.132 +     _state->frag_mvs[_fragi])>1){
   1.133 +      oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
   1.134 +       _dct_coeffs+64);
   1.135 +    }
   1.136 +    else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
   1.137 +  }
   1.138 +}
   1.139 +
   1.140 +# if defined(OC_ARM_ASM_MEDIA)
   1.141 +void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi,
   1.142 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
   1.143 +  unsigned char *dst;
   1.144 +  ptrdiff_t      frag_buf_off;
   1.145 +  int            ystride;
   1.146 +  int            refi;
   1.147 +  /*Apply the inverse transform.*/
   1.148 +  /*Special case only having a DC component.*/
   1.149 +  if(_last_zzi<2){
   1.150 +    ogg_uint16_t p;
   1.151 +    /*We round this dequant product (and not any of the others) because there's
   1.152 +       no iDCT rounding.*/
   1.153 +    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
   1.154 +    oc_idct8x8_1_v6(_dct_coeffs+64,p);
   1.155 +  }
   1.156 +  else{
   1.157 +    /*First, dequantize the DC coefficient.*/
   1.158 +    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
   1.159 +    oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi);
   1.160 +  }
   1.161 +  /*Fill in the target buffer.*/
   1.162 +  frag_buf_off=_state->frag_buf_offs[_fragi];
   1.163 +  refi=_state->frags[_fragi].refi;
   1.164 +  ystride=_state->ref_ystride[_pli];
   1.165 +  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
   1.166 +  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
   1.167 +  else{
   1.168 +    const unsigned char *ref;
   1.169 +    int                  mvoffsets[2];
   1.170 +    ref=_state->ref_frame_data[refi]+frag_buf_off;
   1.171 +    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
   1.172 +     _state->frag_mvs[_fragi])>1){
   1.173 +      oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
   1.174 +       _dct_coeffs+64);
   1.175 +    }
   1.176 +    else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
   1.177 +  }
   1.178 +}
   1.179 +
   1.180 +# if defined(OC_ARM_ASM_NEON)
   1.181 +void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi,
   1.182 + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
   1.183 +  unsigned char *dst;
   1.184 +  ptrdiff_t      frag_buf_off;
   1.185 +  int            ystride;
   1.186 +  int            refi;
   1.187 +  /*Apply the inverse transform.*/
   1.188 +  /*Special case only having a DC component.*/
   1.189 +  if(_last_zzi<2){
   1.190 +    ogg_uint16_t p;
   1.191 +    /*We round this dequant product (and not any of the others) because there's
   1.192 +       no iDCT rounding.*/
   1.193 +    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
   1.194 +    oc_idct8x8_1_neon(_dct_coeffs+64,p);
   1.195 +  }
   1.196 +  else{
   1.197 +    /*First, dequantize the DC coefficient.*/
   1.198 +    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
   1.199 +    oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi);
   1.200 +  }
   1.201 +  /*Fill in the target buffer.*/
   1.202 +  frag_buf_off=_state->frag_buf_offs[_fragi];
   1.203 +  refi=_state->frags[_fragi].refi;
   1.204 +  ystride=_state->ref_ystride[_pli];
   1.205 +  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
   1.206 +  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
   1.207 +  else{
   1.208 +    const unsigned char *ref;
   1.209 +    int                  mvoffsets[2];
   1.210 +    ref=_state->ref_frame_data[refi]+frag_buf_off;
   1.211 +    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
   1.212 +     _state->frag_mvs[_fragi])>1){
   1.213 +      oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
   1.214 +       _dct_coeffs+64);
   1.215 +    }
   1.216 +    else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
   1.217 +  }
   1.218 +}
   1.219 +#  endif
   1.220 +# endif
   1.221 +
   1.222 +#endif

mercurial