1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libtheora/lib/idct.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,329 @@ 1.4 +/******************************************************************** 1.5 + * * 1.6 + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * 1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 1.10 + * * 1.11 + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * 1.12 + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * 1.13 + * * 1.14 + ******************************************************************** 1.15 + 1.16 + function: 1.17 + last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $ 1.18 + 1.19 + ********************************************************************/ 1.20 + 1.21 +#include <string.h> 1.22 +#include "internal.h" 1.23 +#include "dct.h" 1.24 + 1.25 +/*Performs an inverse 8 point Type-II DCT transform. 1.26 + The output is scaled by a factor of 2 relative to the orthonormal version of 1.27 + the transform. 1.28 + _y: The buffer to store the result in. 1.29 + Data will be placed in every 8th entry (e.g., in a column of an 8x8 1.30 + block). 1.31 + _x: The input coefficients. 1.32 + The first 8 entries are used (e.g., from a row of an 8x8 block).*/ 1.33 +static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ 1.34 + ogg_int32_t t[8]; 1.35 + ogg_int32_t r; 1.36 + /*Stage 1:*/ 1.37 + /*0-1 butterfly.*/ 1.38 + t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; 1.39 + t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; 1.40 + /*2-3 rotation by 6pi/16.*/ 1.41 + t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); 1.42 + t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); 1.43 + /*4-7 rotation by 7pi/16.*/ 1.44 + t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); 1.45 + /*5-6 rotation by 3pi/16.*/ 1.46 + t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); 1.47 + t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); 1.48 + t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); 1.49 + /*Stage 2:*/ 1.50 + /*4-5 butterfly.*/ 1.51 + r=t[4]+t[5]; 1.52 + t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; 1.53 + t[4]=r; 1.54 + /*7-6 butterfly.*/ 1.55 + r=t[7]+t[6]; 1.56 + t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; 1.57 + t[7]=r; 1.58 + /*Stage 3:*/ 1.59 + /*0-3 butterfly.*/ 1.60 + r=t[0]+t[3]; 1.61 + t[3]=t[0]-t[3]; 1.62 + t[0]=r; 1.63 + /*1-2 butterfly.*/ 1.64 + r=t[1]+t[2]; 1.65 + t[2]=t[1]-t[2]; 1.66 + t[1]=r; 1.67 + /*6-5 butterfly.*/ 1.68 + r=t[6]+t[5]; 1.69 + t[5]=t[6]-t[5]; 1.70 + t[6]=r; 1.71 + /*Stage 4:*/ 1.72 + /*0-7 butterfly.*/ 1.73 + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); 1.74 + /*1-6 butterfly.*/ 1.75 + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); 1.76 + /*2-5 butterfly.*/ 1.77 + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); 1.78 + /*3-4 butterfly.*/ 1.79 + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); 1.80 + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); 1.81 + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); 1.82 + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); 1.83 + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); 1.84 +} 1.85 + 1.86 +/*Performs an inverse 8 point Type-II DCT transform. 1.87 + The output is scaled by a factor of 2 relative to the orthonormal version of 1.88 + the transform. 1.89 + _y: The buffer to store the result in. 1.90 + Data will be placed in every 8th entry (e.g., in a column of an 8x8 1.91 + block). 1.92 + _x: The input coefficients. 1.93 + Only the first 4 entries are used. 1.94 + The other 4 are assumed to be 0.*/ 1.95 +static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ 1.96 + ogg_int32_t t[8]; 1.97 + ogg_int32_t r; 1.98 + /*Stage 1:*/ 1.99 + t[0]=OC_C4S4*_x[0]>>16; 1.100 + t[2]=OC_C6S2*_x[2]>>16; 1.101 + t[3]=OC_C2S6*_x[2]>>16; 1.102 + t[4]=OC_C7S1*_x[1]>>16; 1.103 + t[5]=-(OC_C5S3*_x[3]>>16); 1.104 + t[6]=OC_C3S5*_x[3]>>16; 1.105 + t[7]=OC_C1S7*_x[1]>>16; 1.106 + /*Stage 2:*/ 1.107 + r=t[4]+t[5]; 1.108 + t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; 1.109 + t[4]=r; 1.110 + r=t[7]+t[6]; 1.111 + t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; 1.112 + t[7]=r; 1.113 + /*Stage 3:*/ 1.114 + t[1]=t[0]+t[2]; 1.115 + t[2]=t[0]-t[2]; 1.116 + r=t[0]+t[3]; 1.117 + t[3]=t[0]-t[3]; 1.118 + t[0]=r; 1.119 + r=t[6]+t[5]; 1.120 + t[5]=t[6]-t[5]; 1.121 + t[6]=r; 1.122 + /*Stage 4:*/ 1.123 + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); 1.124 + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); 1.125 + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); 1.126 + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); 1.127 + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); 1.128 + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); 1.129 + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); 1.130 + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); 1.131 +} 1.132 + 1.133 +/*Performs an inverse 8 point Type-II DCT transform. 1.134 + The output is scaled by a factor of 2 relative to the orthonormal version of 1.135 + the transform. 1.136 + _y: The buffer to store the result in. 1.137 + Data will be placed in every 8th entry (e.g., in a column of an 8x8 1.138 + block). 1.139 + _x: The input coefficients. 1.140 + Only the first 3 entries are used. 1.141 + The other 5 are assumed to be 0.*/ 1.142 +static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ 1.143 + ogg_int32_t t[8]; 1.144 + ogg_int32_t r; 1.145 + /*Stage 1:*/ 1.146 + t[0]=OC_C4S4*_x[0]>>16; 1.147 + t[2]=OC_C6S2*_x[2]>>16; 1.148 + t[3]=OC_C2S6*_x[2]>>16; 1.149 + t[4]=OC_C7S1*_x[1]>>16; 1.150 + t[7]=OC_C1S7*_x[1]>>16; 1.151 + /*Stage 2:*/ 1.152 + t[5]=OC_C4S4*t[4]>>16; 1.153 + t[6]=OC_C4S4*t[7]>>16; 1.154 + /*Stage 3:*/ 1.155 + t[1]=t[0]+t[2]; 1.156 + t[2]=t[0]-t[2]; 1.157 + r=t[0]+t[3]; 1.158 + t[3]=t[0]-t[3]; 1.159 + t[0]=r; 1.160 + r=t[6]+t[5]; 1.161 + t[5]=t[6]-t[5]; 1.162 + t[6]=r; 1.163 + /*Stage 4:*/ 1.164 + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); 1.165 + _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); 1.166 + _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); 1.167 + _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); 1.168 + _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); 1.169 + _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); 1.170 + _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); 1.171 + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); 1.172 +} 1.173 + 1.174 +/*Performs an inverse 8 point Type-II DCT transform. 1.175 + The output is scaled by a factor of 2 relative to the orthonormal version of 1.176 + the transform. 1.177 + _y: The buffer to store the result in. 1.178 + Data will be placed in every 8th entry (e.g., in a column of an 8x8 1.179 + block). 1.180 + _x: The input coefficients. 1.181 + Only the first 2 entries are used. 1.182 + The other 6 are assumed to be 0.*/ 1.183 +static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ 1.184 + ogg_int32_t t[8]; 1.185 + ogg_int32_t r; 1.186 + /*Stage 1:*/ 1.187 + t[0]=OC_C4S4*_x[0]>>16; 1.188 + t[4]=OC_C7S1*_x[1]>>16; 1.189 + t[7]=OC_C1S7*_x[1]>>16; 1.190 + /*Stage 2:*/ 1.191 + t[5]=OC_C4S4*t[4]>>16; 1.192 + t[6]=OC_C4S4*t[7]>>16; 1.193 + /*Stage 3:*/ 1.194 + r=t[6]+t[5]; 1.195 + t[5]=t[6]-t[5]; 1.196 + t[6]=r; 1.197 + /*Stage 4:*/ 1.198 + _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); 1.199 + _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); 1.200 + _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); 1.201 + _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); 1.202 + _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); 1.203 + _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); 1.204 + _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); 1.205 + _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); 1.206 +} 1.207 + 1.208 +/*Performs an inverse 8 point Type-II DCT transform. 1.209 + The output is scaled by a factor of 2 relative to the orthonormal version of 1.210 + the transform. 1.211 + _y: The buffer to store the result in. 1.212 + Data will be placed in every 8th entry (e.g., in a column of an 8x8 1.213 + block). 1.214 + _x: The input coefficients. 1.215 + Only the first entry is used. 1.216 + The other 7 are assumed to be 0.*/ 1.217 +static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ 1.218 + _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= 1.219 + _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); 1.220 +} 1.221 + 1.222 +/*Performs an inverse 8x8 Type-II DCT transform. 1.223 + The input is assumed to be scaled by a factor of 4 relative to orthonormal 1.224 + version of the transform. 1.225 + All coefficients but the first 3 in zig-zag scan order are assumed to be 0: 1.226 + x x 0 0 0 0 0 0 1.227 + x 0 0 0 0 0 0 0 1.228 + 0 0 0 0 0 0 0 0 1.229 + 0 0 0 0 0 0 0 0 1.230 + 0 0 0 0 0 0 0 0 1.231 + 0 0 0 0 0 0 0 0 1.232 + 0 0 0 0 0 0 0 0 1.233 + 0 0 0 0 0 0 0 0 1.234 + _y: The buffer to store the result in. 1.235 + This may be the same as _x. 1.236 + _x: The input coefficients.*/ 1.237 +static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){ 1.238 + ogg_int16_t w[64]; 1.239 + int i; 1.240 + /*Transform rows of x into columns of w.*/ 1.241 + idct8_2(w,_x); 1.242 + idct8_1(w+1,_x+8); 1.243 + /*Transform rows of w into columns of y.*/ 1.244 + for(i=0;i<8;i++)idct8_2(_y+i,w+i*8); 1.245 + /*Adjust for the scale factor.*/ 1.246 + for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); 1.247 + /*Clear input data for next block (decoder only).*/ 1.248 + if(_x!=_y)_x[0]=_x[1]=_x[8]=0; 1.249 +} 1.250 + 1.251 +/*Performs an inverse 8x8 Type-II DCT transform. 1.252 + The input is assumed to be scaled by a factor of 4 relative to orthonormal 1.253 + version of the transform. 1.254 + All coefficients but the first 10 in zig-zag scan order are assumed to be 0: 1.255 + x x x x 0 0 0 0 1.256 + x x x 0 0 0 0 0 1.257 + x x 0 0 0 0 0 0 1.258 + x 0 0 0 0 0 0 0 1.259 + 0 0 0 0 0 0 0 0 1.260 + 0 0 0 0 0 0 0 0 1.261 + 0 0 0 0 0 0 0 0 1.262 + 0 0 0 0 0 0 0 0 1.263 + _y: The buffer to store the result in. 1.264 + This may be the same as _x. 1.265 + _x: The input coefficients.*/ 1.266 +static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){ 1.267 + ogg_int16_t w[64]; 1.268 + int i; 1.269 + /*Transform rows of x into columns of w.*/ 1.270 + idct8_4(w,_x); 1.271 + idct8_3(w+1,_x+8); 1.272 + idct8_2(w+2,_x+16); 1.273 + idct8_1(w+3,_x+24); 1.274 + /*Transform rows of w into columns of y.*/ 1.275 + for(i=0;i<8;i++)idct8_4(_y+i,w+i*8); 1.276 + /*Adjust for the scale factor.*/ 1.277 + for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); 1.278 + /*Clear input data for next block (decoder only).*/ 1.279 + if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0; 1.280 +} 1.281 + 1.282 +/*Performs an inverse 8x8 Type-II DCT transform. 1.283 + The input is assumed to be scaled by a factor of 4 relative to orthonormal 1.284 + version of the transform. 1.285 + _y: The buffer to store the result in. 1.286 + This may be the same as _x. 1.287 + _x: The input coefficients.*/ 1.288 +static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){ 1.289 + ogg_int16_t w[64]; 1.290 + int i; 1.291 + /*Transform rows of x into columns of w.*/ 1.292 + for(i=0;i<8;i++)idct8(w+i,_x+i*8); 1.293 + /*Transform rows of w into columns of y.*/ 1.294 + for(i=0;i<8;i++)idct8(_y+i,w+i*8); 1.295 + /*Adjust for the scale factor.*/ 1.296 + for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); 1.297 + if(_x!=_y)for(i=0;i<64;i++)_x[i]=0; 1.298 +} 1.299 + 1.300 +/*Performs an inverse 8x8 Type-II DCT transform. 1.301 + The input is assumed to be scaled by a factor of 4 relative to orthonormal 1.302 + version of the transform.*/ 1.303 +void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){ 1.304 + /*_last_zzi is subtly different from an actual count of the number of 1.305 + coefficients we decoded for this block. 1.306 + It contains the value of zzi BEFORE the final token in the block was 1.307 + decoded. 1.308 + In most cases this is an EOB token (the continuation of an EOB run from a 1.309 + previous block counts), and so this is the same as the coefficient count. 1.310 + However, in the case that the last token was NOT an EOB token, but filled 1.311 + the block up with exactly 64 coefficients, _last_zzi will be less than 64. 1.312 + Provided the last token was not a pure zero run, the minimum value it can 1.313 + be is 46, and so that doesn't affect any of the cases in this routine. 1.314 + However, if the last token WAS a pure zero run of length 63, then _last_zzi 1.315 + will be 1 while the number of coefficients decoded is 64. 1.316 + Thus, we will trigger the following special case, where the real 1.317 + coefficient count would not. 1.318 + Note also that a zero run of length 64 will give _last_zzi a value of 0, 1.319 + but we still process the DC coefficient, which might have a non-zero value 1.320 + due to DC prediction. 1.321 + Although convoluted, this is arguably the correct behavior: it allows us to 1.322 + use a smaller transform when the block ends with a long zero run instead 1.323 + of a normal EOB token. 1.324 + It could be smarter... multiple separate zero runs at the end of a block 1.325 + will fool it, but an encoder that generates these really deserves what it 1.326 + gets. 1.327 + Needless to say we inherited this approach from VP3.*/ 1.328 + /*Then perform the iDCT.*/ 1.329 + if(_last_zzi<=3)oc_idct8x8_3(_y,_x); 1.330 + else if(_last_zzi<=10)oc_idct8x8_10(_y,_x); 1.331 + else oc_idct8x8_slow(_y,_x); 1.332 +}