Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /******************************************************************** |
michael@0 | 2 | * * |
michael@0 | 3 | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
michael@0 | 4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
michael@0 | 5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
michael@0 | 6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
michael@0 | 7 | * * |
michael@0 | 8 | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * |
michael@0 | 9 | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
michael@0 | 10 | * * |
michael@0 | 11 | ******************************************************************** |
michael@0 | 12 | |
michael@0 | 13 | function: |
michael@0 | 14 | last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $ |
michael@0 | 15 | |
michael@0 | 16 | ********************************************************************/ |
michael@0 | 17 | #include "armint.h" |
michael@0 | 18 | |
michael@0 | 19 | #if defined(OC_ARM_ASM) |
michael@0 | 20 | |
michael@0 | 21 | # if defined(OC_ARM_ASM_NEON) |
michael@0 | 22 | /*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into |
michael@0 | 23 | the destination.*/ |
michael@0 | 24 | static const unsigned char OC_FZIG_ZAG_NEON[128]={ |
michael@0 | 25 | 0, 8, 1, 2, 9,16,24,17, |
michael@0 | 26 | 10, 3, 4,11,18,25,32,40, |
michael@0 | 27 | 33,26,19,12, 5, 6,13,20, |
michael@0 | 28 | 27,34,41,48,56,49,42,35, |
michael@0 | 29 | 28,21,14, 7,15,22,29,36, |
michael@0 | 30 | 43,50,57,58,51,44,37,30, |
michael@0 | 31 | 23,31,38,45,52,59,60,53, |
michael@0 | 32 | 46,39,47,54,61,62,55,63, |
michael@0 | 33 | 64,64,64,64,64,64,64,64, |
michael@0 | 34 | 64,64,64,64,64,64,64,64, |
michael@0 | 35 | 64,64,64,64,64,64,64,64, |
michael@0 | 36 | 64,64,64,64,64,64,64,64, |
michael@0 | 37 | 64,64,64,64,64,64,64,64, |
michael@0 | 38 | 64,64,64,64,64,64,64,64, |
michael@0 | 39 | 64,64,64,64,64,64,64,64, |
michael@0 | 40 | 64,64,64,64,64,64,64,64 |
michael@0 | 41 | }; |
michael@0 | 42 | # endif |
michael@0 | 43 | |
michael@0 | 44 | void oc_state_accel_init_arm(oc_theora_state *_state){ |
michael@0 | 45 | oc_state_accel_init_c(_state); |
michael@0 | 46 | _state->cpu_flags=oc_cpu_flags_get(); |
michael@0 | 47 | # if defined(OC_STATE_USE_VTABLE) |
michael@0 | 48 | _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm; |
michael@0 | 49 | _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm; |
michael@0 | 50 | _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm; |
michael@0 | 51 | _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm; |
michael@0 | 52 | _state->opt_vtable.idct8x8=oc_idct8x8_arm; |
michael@0 | 53 | _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm; |
michael@0 | 54 | /*Note: We _must_ set this function pointer, because the macro in armint.h |
michael@0 | 55 | calls it with different arguments, so the C version will segfault.*/ |
michael@0 | 56 | _state->opt_vtable.state_loop_filter_frag_rows= |
michael@0 | 57 | (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm; |
michael@0 | 58 | # endif |
michael@0 | 59 | # if defined(OC_ARM_ASM_EDSP) |
michael@0 | 60 | if(_state->cpu_flags&OC_CPU_ARM_EDSP){ |
michael@0 | 61 | # if defined(OC_STATE_USE_VTABLE) |
michael@0 | 62 | _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp; |
michael@0 | 63 | # endif |
michael@0 | 64 | } |
michael@0 | 65 | # if defined(OC_ARM_ASM_MEDIA) |
michael@0 | 66 | if(_state->cpu_flags&OC_CPU_ARM_MEDIA){ |
michael@0 | 67 | # if defined(OC_STATE_USE_VTABLE) |
michael@0 | 68 | _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6; |
michael@0 | 69 | _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6; |
michael@0 | 70 | _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6; |
michael@0 | 71 | _state->opt_vtable.idct8x8=oc_idct8x8_v6; |
michael@0 | 72 | _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6; |
michael@0 | 73 | _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6; |
michael@0 | 74 | _state->opt_vtable.state_loop_filter_frag_rows= |
michael@0 | 75 | (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6; |
michael@0 | 76 | # endif |
michael@0 | 77 | } |
michael@0 | 78 | # if defined(OC_ARM_ASM_NEON) |
michael@0 | 79 | if(_state->cpu_flags&OC_CPU_ARM_NEON){ |
michael@0 | 80 | # if defined(OC_STATE_USE_VTABLE) |
michael@0 | 81 | _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon; |
michael@0 | 82 | _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon; |
michael@0 | 83 | _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon; |
michael@0 | 84 | _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon; |
michael@0 | 85 | _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon; |
michael@0 | 86 | _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon; |
michael@0 | 87 | _state->opt_vtable.state_loop_filter_frag_rows= |
michael@0 | 88 | (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon; |
michael@0 | 89 | _state->opt_vtable.idct8x8=oc_idct8x8_neon; |
michael@0 | 90 | # endif |
michael@0 | 91 | _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON; |
michael@0 | 92 | } |
michael@0 | 93 | # endif |
michael@0 | 94 | # endif |
michael@0 | 95 | # endif |
michael@0 | 96 | } |
michael@0 | 97 | |
michael@0 | 98 | void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi, |
michael@0 | 99 | int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
michael@0 | 100 | unsigned char *dst; |
michael@0 | 101 | ptrdiff_t frag_buf_off; |
michael@0 | 102 | int ystride; |
michael@0 | 103 | int refi; |
michael@0 | 104 | /*Apply the inverse transform.*/ |
michael@0 | 105 | /*Special case only having a DC component.*/ |
michael@0 | 106 | if(_last_zzi<2){ |
michael@0 | 107 | ogg_uint16_t p; |
michael@0 | 108 | /*We round this dequant product (and not any of the others) because there's |
michael@0 | 109 | no iDCT rounding.*/ |
michael@0 | 110 | p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
michael@0 | 111 | oc_idct8x8_1_arm(_dct_coeffs+64,p); |
michael@0 | 112 | } |
michael@0 | 113 | else{ |
michael@0 | 114 | /*First, dequantize the DC coefficient.*/ |
michael@0 | 115 | _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
michael@0 | 116 | oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi); |
michael@0 | 117 | } |
michael@0 | 118 | /*Fill in the target buffer.*/ |
michael@0 | 119 | frag_buf_off=_state->frag_buf_offs[_fragi]; |
michael@0 | 120 | refi=_state->frags[_fragi].refi; |
michael@0 | 121 | ystride=_state->ref_ystride[_pli]; |
michael@0 | 122 | dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
michael@0 | 123 | if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64); |
michael@0 | 124 | else{ |
michael@0 | 125 | const unsigned char *ref; |
michael@0 | 126 | int mvoffsets[2]; |
michael@0 | 127 | ref=_state->ref_frame_data[refi]+frag_buf_off; |
michael@0 | 128 | if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
michael@0 | 129 | _state->frag_mvs[_fragi])>1){ |
michael@0 | 130 | oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, |
michael@0 | 131 | _dct_coeffs+64); |
michael@0 | 132 | } |
michael@0 | 133 | else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
michael@0 | 134 | } |
michael@0 | 135 | } |
michael@0 | 136 | |
michael@0 | 137 | # if defined(OC_ARM_ASM_MEDIA) |
michael@0 | 138 | void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi, |
michael@0 | 139 | int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
michael@0 | 140 | unsigned char *dst; |
michael@0 | 141 | ptrdiff_t frag_buf_off; |
michael@0 | 142 | int ystride; |
michael@0 | 143 | int refi; |
michael@0 | 144 | /*Apply the inverse transform.*/ |
michael@0 | 145 | /*Special case only having a DC component.*/ |
michael@0 | 146 | if(_last_zzi<2){ |
michael@0 | 147 | ogg_uint16_t p; |
michael@0 | 148 | /*We round this dequant product (and not any of the others) because there's |
michael@0 | 149 | no iDCT rounding.*/ |
michael@0 | 150 | p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
michael@0 | 151 | oc_idct8x8_1_v6(_dct_coeffs+64,p); |
michael@0 | 152 | } |
michael@0 | 153 | else{ |
michael@0 | 154 | /*First, dequantize the DC coefficient.*/ |
michael@0 | 155 | _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
michael@0 | 156 | oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi); |
michael@0 | 157 | } |
michael@0 | 158 | /*Fill in the target buffer.*/ |
michael@0 | 159 | frag_buf_off=_state->frag_buf_offs[_fragi]; |
michael@0 | 160 | refi=_state->frags[_fragi].refi; |
michael@0 | 161 | ystride=_state->ref_ystride[_pli]; |
michael@0 | 162 | dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
michael@0 | 163 | if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64); |
michael@0 | 164 | else{ |
michael@0 | 165 | const unsigned char *ref; |
michael@0 | 166 | int mvoffsets[2]; |
michael@0 | 167 | ref=_state->ref_frame_data[refi]+frag_buf_off; |
michael@0 | 168 | if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
michael@0 | 169 | _state->frag_mvs[_fragi])>1){ |
michael@0 | 170 | oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, |
michael@0 | 171 | _dct_coeffs+64); |
michael@0 | 172 | } |
michael@0 | 173 | else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
michael@0 | 174 | } |
michael@0 | 175 | } |
michael@0 | 176 | |
michael@0 | 177 | # if defined(OC_ARM_ASM_NEON) |
michael@0 | 178 | void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi, |
michael@0 | 179 | int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
michael@0 | 180 | unsigned char *dst; |
michael@0 | 181 | ptrdiff_t frag_buf_off; |
michael@0 | 182 | int ystride; |
michael@0 | 183 | int refi; |
michael@0 | 184 | /*Apply the inverse transform.*/ |
michael@0 | 185 | /*Special case only having a DC component.*/ |
michael@0 | 186 | if(_last_zzi<2){ |
michael@0 | 187 | ogg_uint16_t p; |
michael@0 | 188 | /*We round this dequant product (and not any of the others) because there's |
michael@0 | 189 | no iDCT rounding.*/ |
michael@0 | 190 | p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
michael@0 | 191 | oc_idct8x8_1_neon(_dct_coeffs+64,p); |
michael@0 | 192 | } |
michael@0 | 193 | else{ |
michael@0 | 194 | /*First, dequantize the DC coefficient.*/ |
michael@0 | 195 | _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
michael@0 | 196 | oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi); |
michael@0 | 197 | } |
michael@0 | 198 | /*Fill in the target buffer.*/ |
michael@0 | 199 | frag_buf_off=_state->frag_buf_offs[_fragi]; |
michael@0 | 200 | refi=_state->frags[_fragi].refi; |
michael@0 | 201 | ystride=_state->ref_ystride[_pli]; |
michael@0 | 202 | dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
michael@0 | 203 | if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64); |
michael@0 | 204 | else{ |
michael@0 | 205 | const unsigned char *ref; |
michael@0 | 206 | int mvoffsets[2]; |
michael@0 | 207 | ref=_state->ref_frame_data[refi]+frag_buf_off; |
michael@0 | 208 | if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
michael@0 | 209 | _state->frag_mvs[_fragi])>1){ |
michael@0 | 210 | oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, |
michael@0 | 211 | _dct_coeffs+64); |
michael@0 | 212 | } |
michael@0 | 213 | else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
michael@0 | 214 | } |
michael@0 | 215 | } |
michael@0 | 216 | # endif |
michael@0 | 217 | # endif |
michael@0 | 218 | |
michael@0 | 219 | #endif |