|
1 /******************************************************************** |
|
2 * * |
|
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
|
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
|
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
|
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
|
7 * * |
|
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
|
9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
|
10 * * |
|
11 ******************************************************************** |
|
12 |
|
13 function: |
|
14 last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $ |
|
15 |
|
16 ********************************************************************/ |
|
17 |
|
18 #include <string.h> |
|
19 #include "internal.h" |
|
20 #include "dct.h" |
|
21 |
|
22 /*Performs an inverse 8 point Type-II DCT transform. |
|
23 The output is scaled by a factor of 2 relative to the orthonormal version of |
|
24 the transform. |
|
25 _y: The buffer to store the result in. |
|
26 Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
|
27 block). |
|
28 _x: The input coefficients. |
|
29 The first 8 entries are used (e.g., from a row of an 8x8 block).*/ |
|
30 static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
|
31 ogg_int32_t t[8]; |
|
32 ogg_int32_t r; |
|
33 /*Stage 1:*/ |
|
34 /*0-1 butterfly.*/ |
|
35 t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; |
|
36 t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; |
|
37 /*2-3 rotation by 6pi/16.*/ |
|
38 t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); |
|
39 t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); |
|
40 /*4-7 rotation by 7pi/16.*/ |
|
41 t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); |
|
42 /*5-6 rotation by 3pi/16.*/ |
|
43 t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); |
|
44 t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); |
|
45 t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); |
|
46 /*Stage 2:*/ |
|
47 /*4-5 butterfly.*/ |
|
48 r=t[4]+t[5]; |
|
49 t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
|
50 t[4]=r; |
|
51 /*7-6 butterfly.*/ |
|
52 r=t[7]+t[6]; |
|
53 t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
|
54 t[7]=r; |
|
55 /*Stage 3:*/ |
|
56 /*0-3 butterfly.*/ |
|
57 r=t[0]+t[3]; |
|
58 t[3]=t[0]-t[3]; |
|
59 t[0]=r; |
|
60 /*1-2 butterfly.*/ |
|
61 r=t[1]+t[2]; |
|
62 t[2]=t[1]-t[2]; |
|
63 t[1]=r; |
|
64 /*6-5 butterfly.*/ |
|
65 r=t[6]+t[5]; |
|
66 t[5]=t[6]-t[5]; |
|
67 t[6]=r; |
|
68 /*Stage 4:*/ |
|
69 /*0-7 butterfly.*/ |
|
70 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
|
71 /*1-6 butterfly.*/ |
|
72 _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
|
73 /*2-5 butterfly.*/ |
|
74 _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
|
75 /*3-4 butterfly.*/ |
|
76 _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
|
77 _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
|
78 _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
|
79 _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
|
80 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
|
81 } |
|
82 |
|
83 /*Performs an inverse 8 point Type-II DCT transform. |
|
84 The output is scaled by a factor of 2 relative to the orthonormal version of |
|
85 the transform. |
|
86 _y: The buffer to store the result in. |
|
87 Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
|
88 block). |
|
89 _x: The input coefficients. |
|
90 Only the first 4 entries are used. |
|
91 The other 4 are assumed to be 0.*/ |
|
92 static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
|
93 ogg_int32_t t[8]; |
|
94 ogg_int32_t r; |
|
95 /*Stage 1:*/ |
|
96 t[0]=OC_C4S4*_x[0]>>16; |
|
97 t[2]=OC_C6S2*_x[2]>>16; |
|
98 t[3]=OC_C2S6*_x[2]>>16; |
|
99 t[4]=OC_C7S1*_x[1]>>16; |
|
100 t[5]=-(OC_C5S3*_x[3]>>16); |
|
101 t[6]=OC_C3S5*_x[3]>>16; |
|
102 t[7]=OC_C1S7*_x[1]>>16; |
|
103 /*Stage 2:*/ |
|
104 r=t[4]+t[5]; |
|
105 t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
|
106 t[4]=r; |
|
107 r=t[7]+t[6]; |
|
108 t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
|
109 t[7]=r; |
|
110 /*Stage 3:*/ |
|
111 t[1]=t[0]+t[2]; |
|
112 t[2]=t[0]-t[2]; |
|
113 r=t[0]+t[3]; |
|
114 t[3]=t[0]-t[3]; |
|
115 t[0]=r; |
|
116 r=t[6]+t[5]; |
|
117 t[5]=t[6]-t[5]; |
|
118 t[6]=r; |
|
119 /*Stage 4:*/ |
|
120 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
|
121 _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
|
122 _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
|
123 _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
|
124 _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
|
125 _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
|
126 _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
|
127 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
|
128 } |
|
129 |
|
130 /*Performs an inverse 8 point Type-II DCT transform. |
|
131 The output is scaled by a factor of 2 relative to the orthonormal version of |
|
132 the transform. |
|
133 _y: The buffer to store the result in. |
|
134 Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
|
135 block). |
|
136 _x: The input coefficients. |
|
137 Only the first 3 entries are used. |
|
138 The other 5 are assumed to be 0.*/ |
|
139 static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
|
140 ogg_int32_t t[8]; |
|
141 ogg_int32_t r; |
|
142 /*Stage 1:*/ |
|
143 t[0]=OC_C4S4*_x[0]>>16; |
|
144 t[2]=OC_C6S2*_x[2]>>16; |
|
145 t[3]=OC_C2S6*_x[2]>>16; |
|
146 t[4]=OC_C7S1*_x[1]>>16; |
|
147 t[7]=OC_C1S7*_x[1]>>16; |
|
148 /*Stage 2:*/ |
|
149 t[5]=OC_C4S4*t[4]>>16; |
|
150 t[6]=OC_C4S4*t[7]>>16; |
|
151 /*Stage 3:*/ |
|
152 t[1]=t[0]+t[2]; |
|
153 t[2]=t[0]-t[2]; |
|
154 r=t[0]+t[3]; |
|
155 t[3]=t[0]-t[3]; |
|
156 t[0]=r; |
|
157 r=t[6]+t[5]; |
|
158 t[5]=t[6]-t[5]; |
|
159 t[6]=r; |
|
160 /*Stage 4:*/ |
|
161 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
|
162 _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
|
163 _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
|
164 _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
|
165 _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
|
166 _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
|
167 _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
|
168 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
|
169 } |
|
170 |
|
171 /*Performs an inverse 8 point Type-II DCT transform. |
|
172 The output is scaled by a factor of 2 relative to the orthonormal version of |
|
173 the transform. |
|
174 _y: The buffer to store the result in. |
|
175 Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
|
176 block). |
|
177 _x: The input coefficients. |
|
178 Only the first 2 entries are used. |
|
179 The other 6 are assumed to be 0.*/ |
|
180 static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
|
181 ogg_int32_t t[8]; |
|
182 ogg_int32_t r; |
|
183 /*Stage 1:*/ |
|
184 t[0]=OC_C4S4*_x[0]>>16; |
|
185 t[4]=OC_C7S1*_x[1]>>16; |
|
186 t[7]=OC_C1S7*_x[1]>>16; |
|
187 /*Stage 2:*/ |
|
188 t[5]=OC_C4S4*t[4]>>16; |
|
189 t[6]=OC_C4S4*t[7]>>16; |
|
190 /*Stage 3:*/ |
|
191 r=t[6]+t[5]; |
|
192 t[5]=t[6]-t[5]; |
|
193 t[6]=r; |
|
194 /*Stage 4:*/ |
|
195 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
|
196 _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); |
|
197 _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); |
|
198 _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); |
|
199 _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); |
|
200 _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); |
|
201 _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); |
|
202 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
|
203 } |
|
204 |
|
205 /*Performs an inverse 8 point Type-II DCT transform. |
|
206 The output is scaled by a factor of 2 relative to the orthonormal version of |
|
207 the transform. |
|
208 _y: The buffer to store the result in. |
|
209 Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
|
210 block). |
|
211 _x: The input coefficients. |
|
212 Only the first entry is used. |
|
213 The other 7 are assumed to be 0.*/ |
|
214 static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ |
|
215 _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= |
|
216 _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); |
|
217 } |
|
218 |
|
219 /*Performs an inverse 8x8 Type-II DCT transform. |
|
220 The input is assumed to be scaled by a factor of 4 relative to orthonormal |
|
221 version of the transform. |
|
222 All coefficients but the first 3 in zig-zag scan order are assumed to be 0: |
|
223 x x 0 0 0 0 0 0 |
|
224 x 0 0 0 0 0 0 0 |
|
225 0 0 0 0 0 0 0 0 |
|
226 0 0 0 0 0 0 0 0 |
|
227 0 0 0 0 0 0 0 0 |
|
228 0 0 0 0 0 0 0 0 |
|
229 0 0 0 0 0 0 0 0 |
|
230 0 0 0 0 0 0 0 0 |
|
231 _y: The buffer to store the result in. |
|
232 This may be the same as _x. |
|
233 _x: The input coefficients.*/ |
|
234 static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
|
235 ogg_int16_t w[64]; |
|
236 int i; |
|
237 /*Transform rows of x into columns of w.*/ |
|
238 idct8_2(w,_x); |
|
239 idct8_1(w+1,_x+8); |
|
240 /*Transform rows of w into columns of y.*/ |
|
241 for(i=0;i<8;i++)idct8_2(_y+i,w+i*8); |
|
242 /*Adjust for the scale factor.*/ |
|
243 for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
|
244 /*Clear input data for next block (decoder only).*/ |
|
245 if(_x!=_y)_x[0]=_x[1]=_x[8]=0; |
|
246 } |
|
247 |
|
248 /*Performs an inverse 8x8 Type-II DCT transform. |
|
249 The input is assumed to be scaled by a factor of 4 relative to orthonormal |
|
250 version of the transform. |
|
251 All coefficients but the first 10 in zig-zag scan order are assumed to be 0: |
|
252 x x x x 0 0 0 0 |
|
253 x x x 0 0 0 0 0 |
|
254 x x 0 0 0 0 0 0 |
|
255 x 0 0 0 0 0 0 0 |
|
256 0 0 0 0 0 0 0 0 |
|
257 0 0 0 0 0 0 0 0 |
|
258 0 0 0 0 0 0 0 0 |
|
259 0 0 0 0 0 0 0 0 |
|
260 _y: The buffer to store the result in. |
|
261 This may be the same as _x. |
|
262 _x: The input coefficients.*/ |
|
263 static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
|
264 ogg_int16_t w[64]; |
|
265 int i; |
|
266 /*Transform rows of x into columns of w.*/ |
|
267 idct8_4(w,_x); |
|
268 idct8_3(w+1,_x+8); |
|
269 idct8_2(w+2,_x+16); |
|
270 idct8_1(w+3,_x+24); |
|
271 /*Transform rows of w into columns of y.*/ |
|
272 for(i=0;i<8;i++)idct8_4(_y+i,w+i*8); |
|
273 /*Adjust for the scale factor.*/ |
|
274 for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
|
275 /*Clear input data for next block (decoder only).*/ |
|
276 if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0; |
|
277 } |
|
278 |
|
279 /*Performs an inverse 8x8 Type-II DCT transform. |
|
280 The input is assumed to be scaled by a factor of 4 relative to orthonormal |
|
281 version of the transform. |
|
282 _y: The buffer to store the result in. |
|
283 This may be the same as _x. |
|
284 _x: The input coefficients.*/ |
|
285 static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
|
286 ogg_int16_t w[64]; |
|
287 int i; |
|
288 /*Transform rows of x into columns of w.*/ |
|
289 for(i=0;i<8;i++)idct8(w+i,_x+i*8); |
|
290 /*Transform rows of w into columns of y.*/ |
|
291 for(i=0;i<8;i++)idct8(_y+i,w+i*8); |
|
292 /*Adjust for the scale factor.*/ |
|
293 for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
|
294 if(_x!=_y)for(i=0;i<64;i++)_x[i]=0; |
|
295 } |
|
296 |
|
297 /*Performs an inverse 8x8 Type-II DCT transform. |
|
298 The input is assumed to be scaled by a factor of 4 relative to orthonormal |
|
299 version of the transform.*/ |
|
300 void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){ |
|
301 /*_last_zzi is subtly different from an actual count of the number of |
|
302 coefficients we decoded for this block. |
|
303 It contains the value of zzi BEFORE the final token in the block was |
|
304 decoded. |
|
305 In most cases this is an EOB token (the continuation of an EOB run from a |
|
306 previous block counts), and so this is the same as the coefficient count. |
|
307 However, in the case that the last token was NOT an EOB token, but filled |
|
308 the block up with exactly 64 coefficients, _last_zzi will be less than 64. |
|
309 Provided the last token was not a pure zero run, the minimum value it can |
|
310 be is 46, and so that doesn't affect any of the cases in this routine. |
|
311 However, if the last token WAS a pure zero run of length 63, then _last_zzi |
|
312 will be 1 while the number of coefficients decoded is 64. |
|
313 Thus, we will trigger the following special case, where the real |
|
314 coefficient count would not. |
|
315 Note also that a zero run of length 64 will give _last_zzi a value of 0, |
|
316 but we still process the DC coefficient, which might have a non-zero value |
|
317 due to DC prediction. |
|
318 Although convoluted, this is arguably the correct behavior: it allows us to |
|
319 use a smaller transform when the block ends with a long zero run instead |
|
320 of a normal EOB token. |
|
321 It could be smarter... multiple separate zero runs at the end of a block |
|
322 will fool it, but an encoder that generates these really deserves what it |
|
323 gets. |
|
324 Needless to say we inherited this approach from VP3.*/ |
|
325 /*Then perform the iDCT.*/ |
|
326 if(_last_zzi<=3)oc_idct8x8_3(_y,_x); |
|
327 else if(_last_zzi<=10)oc_idct8x8_10(_y,_x); |
|
328 else oc_idct8x8_slow(_y,_x); |
|
329 } |