|
1 /******************************************************************** |
|
2 * * |
|
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
|
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
|
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
|
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
|
7 * * |
|
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 * |
|
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
|
10 * * |
|
11 ******************************************************************** |
|
12 |
|
13 function: |
|
14 last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $ |
|
15 |
|
16 ********************************************************************/ |
|
17 #include "armint.h" |
|
18 |
|
19 #if defined(OC_ARM_ASM) |
|
20 |
|
21 # if defined(OC_ARM_ASM_NEON) |
|
22 /*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into |
|
23 the destination.*/ |
|
24 static const unsigned char OC_FZIG_ZAG_NEON[128]={ |
|
25 0, 8, 1, 2, 9,16,24,17, |
|
26 10, 3, 4,11,18,25,32,40, |
|
27 33,26,19,12, 5, 6,13,20, |
|
28 27,34,41,48,56,49,42,35, |
|
29 28,21,14, 7,15,22,29,36, |
|
30 43,50,57,58,51,44,37,30, |
|
31 23,31,38,45,52,59,60,53, |
|
32 46,39,47,54,61,62,55,63, |
|
33 64,64,64,64,64,64,64,64, |
|
34 64,64,64,64,64,64,64,64, |
|
35 64,64,64,64,64,64,64,64, |
|
36 64,64,64,64,64,64,64,64, |
|
37 64,64,64,64,64,64,64,64, |
|
38 64,64,64,64,64,64,64,64, |
|
39 64,64,64,64,64,64,64,64, |
|
40 64,64,64,64,64,64,64,64 |
|
41 }; |
|
42 # endif |
|
43 |
|
44 void oc_state_accel_init_arm(oc_theora_state *_state){ |
|
45 oc_state_accel_init_c(_state); |
|
46 _state->cpu_flags=oc_cpu_flags_get(); |
|
47 # if defined(OC_STATE_USE_VTABLE) |
|
48 _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm; |
|
49 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm; |
|
50 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm; |
|
51 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm; |
|
52 _state->opt_vtable.idct8x8=oc_idct8x8_arm; |
|
53 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm; |
|
54 /*Note: We _must_ set this function pointer, because the macro in armint.h |
|
55 calls it with different arguments, so the C version will segfault.*/ |
|
56 _state->opt_vtable.state_loop_filter_frag_rows= |
|
57 (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm; |
|
58 # endif |
|
59 # if defined(OC_ARM_ASM_EDSP) |
|
60 if(_state->cpu_flags&OC_CPU_ARM_EDSP){ |
|
61 # if defined(OC_STATE_USE_VTABLE) |
|
62 _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp; |
|
63 # endif |
|
64 } |
|
65 # if defined(OC_ARM_ASM_MEDIA) |
|
66 if(_state->cpu_flags&OC_CPU_ARM_MEDIA){ |
|
67 # if defined(OC_STATE_USE_VTABLE) |
|
68 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6; |
|
69 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6; |
|
70 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6; |
|
71 _state->opt_vtable.idct8x8=oc_idct8x8_v6; |
|
72 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6; |
|
73 _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6; |
|
74 _state->opt_vtable.state_loop_filter_frag_rows= |
|
75 (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6; |
|
76 # endif |
|
77 } |
|
78 # if defined(OC_ARM_ASM_NEON) |
|
79 if(_state->cpu_flags&OC_CPU_ARM_NEON){ |
|
80 # if defined(OC_STATE_USE_VTABLE) |
|
81 _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon; |
|
82 _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon; |
|
83 _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon; |
|
84 _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon; |
|
85 _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon; |
|
86 _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon; |
|
87 _state->opt_vtable.state_loop_filter_frag_rows= |
|
88 (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon; |
|
89 _state->opt_vtable.idct8x8=oc_idct8x8_neon; |
|
90 # endif |
|
91 _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON; |
|
92 } |
|
93 # endif |
|
94 # endif |
|
95 # endif |
|
96 } |
|
97 |
|
98 void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi, |
|
99 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
|
100 unsigned char *dst; |
|
101 ptrdiff_t frag_buf_off; |
|
102 int ystride; |
|
103 int refi; |
|
104 /*Apply the inverse transform.*/ |
|
105 /*Special case only having a DC component.*/ |
|
106 if(_last_zzi<2){ |
|
107 ogg_uint16_t p; |
|
108 /*We round this dequant product (and not any of the others) because there's |
|
109 no iDCT rounding.*/ |
|
110 p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
|
111 oc_idct8x8_1_arm(_dct_coeffs+64,p); |
|
112 } |
|
113 else{ |
|
114 /*First, dequantize the DC coefficient.*/ |
|
115 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
|
116 oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi); |
|
117 } |
|
118 /*Fill in the target buffer.*/ |
|
119 frag_buf_off=_state->frag_buf_offs[_fragi]; |
|
120 refi=_state->frags[_fragi].refi; |
|
121 ystride=_state->ref_ystride[_pli]; |
|
122 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
|
123 if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64); |
|
124 else{ |
|
125 const unsigned char *ref; |
|
126 int mvoffsets[2]; |
|
127 ref=_state->ref_frame_data[refi]+frag_buf_off; |
|
128 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
|
129 _state->frag_mvs[_fragi])>1){ |
|
130 oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, |
|
131 _dct_coeffs+64); |
|
132 } |
|
133 else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
|
134 } |
|
135 } |
|
136 |
|
137 # if defined(OC_ARM_ASM_MEDIA) |
|
138 void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi, |
|
139 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
|
140 unsigned char *dst; |
|
141 ptrdiff_t frag_buf_off; |
|
142 int ystride; |
|
143 int refi; |
|
144 /*Apply the inverse transform.*/ |
|
145 /*Special case only having a DC component.*/ |
|
146 if(_last_zzi<2){ |
|
147 ogg_uint16_t p; |
|
148 /*We round this dequant product (and not any of the others) because there's |
|
149 no iDCT rounding.*/ |
|
150 p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
|
151 oc_idct8x8_1_v6(_dct_coeffs+64,p); |
|
152 } |
|
153 else{ |
|
154 /*First, dequantize the DC coefficient.*/ |
|
155 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
|
156 oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi); |
|
157 } |
|
158 /*Fill in the target buffer.*/ |
|
159 frag_buf_off=_state->frag_buf_offs[_fragi]; |
|
160 refi=_state->frags[_fragi].refi; |
|
161 ystride=_state->ref_ystride[_pli]; |
|
162 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
|
163 if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64); |
|
164 else{ |
|
165 const unsigned char *ref; |
|
166 int mvoffsets[2]; |
|
167 ref=_state->ref_frame_data[refi]+frag_buf_off; |
|
168 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
|
169 _state->frag_mvs[_fragi])>1){ |
|
170 oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, |
|
171 _dct_coeffs+64); |
|
172 } |
|
173 else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
|
174 } |
|
175 } |
|
176 |
|
177 # if defined(OC_ARM_ASM_NEON) |
|
178 void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi, |
|
179 int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){ |
|
180 unsigned char *dst; |
|
181 ptrdiff_t frag_buf_off; |
|
182 int ystride; |
|
183 int refi; |
|
184 /*Apply the inverse transform.*/ |
|
185 /*Special case only having a DC component.*/ |
|
186 if(_last_zzi<2){ |
|
187 ogg_uint16_t p; |
|
188 /*We round this dequant product (and not any of the others) because there's |
|
189 no iDCT rounding.*/ |
|
190 p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5); |
|
191 oc_idct8x8_1_neon(_dct_coeffs+64,p); |
|
192 } |
|
193 else{ |
|
194 /*First, dequantize the DC coefficient.*/ |
|
195 _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant); |
|
196 oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi); |
|
197 } |
|
198 /*Fill in the target buffer.*/ |
|
199 frag_buf_off=_state->frag_buf_offs[_fragi]; |
|
200 refi=_state->frags[_fragi].refi; |
|
201 ystride=_state->ref_ystride[_pli]; |
|
202 dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off; |
|
203 if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64); |
|
204 else{ |
|
205 const unsigned char *ref; |
|
206 int mvoffsets[2]; |
|
207 ref=_state->ref_frame_data[refi]+frag_buf_off; |
|
208 if(oc_state_get_mv_offsets(_state,mvoffsets,_pli, |
|
209 _state->frag_mvs[_fragi])>1){ |
|
210 oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride, |
|
211 _dct_coeffs+64); |
|
212 } |
|
213 else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64); |
|
214 } |
|
215 } |
|
216 # endif |
|
217 # endif |
|
218 |
|
219 #endif |