media/libtheora/lib/idct.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libtheora/lib/idct.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,329 @@
     1.4 +/********************************************************************
     1.5 + *                                                                  *
     1.6 + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
     1.7 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     1.8 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     1.9 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
    1.10 + *                                                                  *
    1.11 + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
    1.12 + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
    1.13 + *                                                                  *
    1.14 + ********************************************************************
    1.15 +
    1.16 +  function:
    1.17 +    last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $
    1.18 +
    1.19 + ********************************************************************/
    1.20 +
    1.21 +#include <string.h>
    1.22 +#include "internal.h"
    1.23 +#include "dct.h"
    1.24 +
    1.25 +/*Performs an inverse 8 point Type-II DCT transform.
    1.26 +  The output is scaled by a factor of 2 relative to the orthonormal version of
    1.27 +   the transform.
    1.28 +  _y: The buffer to store the result in.
    1.29 +      Data will be placed in every 8th entry (e.g., in a column of an 8x8
    1.30 +       block).
    1.31 +  _x: The input coefficients.
    1.32 +      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
    1.33 +static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
    1.34 +  ogg_int32_t t[8];
    1.35 +  ogg_int32_t r;
    1.36 +  /*Stage 1:*/
    1.37 +  /*0-1 butterfly.*/
    1.38 +  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
    1.39 +  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
    1.40 +  /*2-3 rotation by 6pi/16.*/
    1.41 +  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
    1.42 +  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
    1.43 +  /*4-7 rotation by 7pi/16.*/
    1.44 +  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
    1.45 +  /*5-6 rotation by 3pi/16.*/
    1.46 +  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
    1.47 +  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
    1.48 +  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
    1.49 +  /*Stage 2:*/
    1.50 +  /*4-5 butterfly.*/
    1.51 +  r=t[4]+t[5];
    1.52 +  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
    1.53 +  t[4]=r;
    1.54 +  /*7-6 butterfly.*/
    1.55 +  r=t[7]+t[6];
    1.56 +  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
    1.57 +  t[7]=r;
    1.58 +  /*Stage 3:*/
    1.59 +  /*0-3 butterfly.*/
    1.60 +  r=t[0]+t[3];
    1.61 +  t[3]=t[0]-t[3];
    1.62 +  t[0]=r;
    1.63 +  /*1-2 butterfly.*/
    1.64 +  r=t[1]+t[2];
    1.65 +  t[2]=t[1]-t[2];
    1.66 +  t[1]=r;
    1.67 +  /*6-5 butterfly.*/
    1.68 +  r=t[6]+t[5];
    1.69 +  t[5]=t[6]-t[5];
    1.70 +  t[6]=r;
    1.71 +  /*Stage 4:*/
    1.72 +  /*0-7 butterfly.*/
    1.73 +  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
    1.74 +  /*1-6 butterfly.*/
    1.75 +  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
    1.76 +  /*2-5 butterfly.*/
    1.77 +  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
    1.78 +  /*3-4 butterfly.*/
    1.79 +  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
    1.80 +  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
    1.81 +  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
    1.82 +  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
    1.83 +  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
    1.84 +}
    1.85 +
    1.86 +/*Performs an inverse 8 point Type-II DCT transform.
    1.87 +  The output is scaled by a factor of 2 relative to the orthonormal version of
    1.88 +   the transform.
    1.89 +  _y: The buffer to store the result in.
    1.90 +      Data will be placed in every 8th entry (e.g., in a column of an 8x8
    1.91 +       block).
    1.92 +  _x: The input coefficients.
    1.93 +      Only the first 4 entries are used.
    1.94 +      The other 4 are assumed to be 0.*/
    1.95 +static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
    1.96 +  ogg_int32_t t[8];
    1.97 +  ogg_int32_t r;
    1.98 +  /*Stage 1:*/
    1.99 +  t[0]=OC_C4S4*_x[0]>>16;
   1.100 +  t[2]=OC_C6S2*_x[2]>>16;
   1.101 +  t[3]=OC_C2S6*_x[2]>>16;
   1.102 +  t[4]=OC_C7S1*_x[1]>>16;
   1.103 +  t[5]=-(OC_C5S3*_x[3]>>16);
   1.104 +  t[6]=OC_C3S5*_x[3]>>16;
   1.105 +  t[7]=OC_C1S7*_x[1]>>16;
   1.106 +  /*Stage 2:*/
   1.107 +  r=t[4]+t[5];
   1.108 +  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
   1.109 +  t[4]=r;
   1.110 +  r=t[7]+t[6];
   1.111 +  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
   1.112 +  t[7]=r;
   1.113 +  /*Stage 3:*/
   1.114 +  t[1]=t[0]+t[2];
   1.115 +  t[2]=t[0]-t[2];
   1.116 +  r=t[0]+t[3];
   1.117 +  t[3]=t[0]-t[3];
   1.118 +  t[0]=r;
   1.119 +  r=t[6]+t[5];
   1.120 +  t[5]=t[6]-t[5];
   1.121 +  t[6]=r;
   1.122 +  /*Stage 4:*/
   1.123 +  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
   1.124 +  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
   1.125 +  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
   1.126 +  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
   1.127 +  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
   1.128 +  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
   1.129 +  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
   1.130 +  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
   1.131 +}
   1.132 +
   1.133 +/*Performs an inverse 8 point Type-II DCT transform.
   1.134 +  The output is scaled by a factor of 2 relative to the orthonormal version of
   1.135 +   the transform.
   1.136 +  _y: The buffer to store the result in.
   1.137 +      Data will be placed in every 8th entry (e.g., in a column of an 8x8
   1.138 +       block).
   1.139 +  _x: The input coefficients.
   1.140 +      Only the first 3 entries are used.
   1.141 +      The other 5 are assumed to be 0.*/
   1.142 +static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
   1.143 +  ogg_int32_t t[8];
   1.144 +  ogg_int32_t r;
   1.145 +  /*Stage 1:*/
   1.146 +  t[0]=OC_C4S4*_x[0]>>16;
   1.147 +  t[2]=OC_C6S2*_x[2]>>16;
   1.148 +  t[3]=OC_C2S6*_x[2]>>16;
   1.149 +  t[4]=OC_C7S1*_x[1]>>16;
   1.150 +  t[7]=OC_C1S7*_x[1]>>16;
   1.151 +  /*Stage 2:*/
   1.152 +  t[5]=OC_C4S4*t[4]>>16;
   1.153 +  t[6]=OC_C4S4*t[7]>>16;
   1.154 +  /*Stage 3:*/
   1.155 +  t[1]=t[0]+t[2];
   1.156 +  t[2]=t[0]-t[2];
   1.157 +  r=t[0]+t[3];
   1.158 +  t[3]=t[0]-t[3];
   1.159 +  t[0]=r;
   1.160 +  r=t[6]+t[5];
   1.161 +  t[5]=t[6]-t[5];
   1.162 +  t[6]=r;
   1.163 +  /*Stage 4:*/
   1.164 +  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
   1.165 +  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
   1.166 +  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
   1.167 +  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
   1.168 +  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
   1.169 +  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
   1.170 +  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
   1.171 +  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
   1.172 +}
   1.173 +
   1.174 +/*Performs an inverse 8 point Type-II DCT transform.
   1.175 +  The output is scaled by a factor of 2 relative to the orthonormal version of
   1.176 +   the transform.
   1.177 +  _y: The buffer to store the result in.
   1.178 +      Data will be placed in every 8th entry (e.g., in a column of an 8x8
   1.179 +       block).
   1.180 +  _x: The input coefficients.
   1.181 +      Only the first 2 entries are used.
   1.182 +      The other 6 are assumed to be 0.*/
   1.183 +static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
   1.184 +  ogg_int32_t t[8];
   1.185 +  ogg_int32_t r;
   1.186 +  /*Stage 1:*/
   1.187 +  t[0]=OC_C4S4*_x[0]>>16;
   1.188 +  t[4]=OC_C7S1*_x[1]>>16;
   1.189 +  t[7]=OC_C1S7*_x[1]>>16;
   1.190 +  /*Stage 2:*/
   1.191 +  t[5]=OC_C4S4*t[4]>>16;
   1.192 +  t[6]=OC_C4S4*t[7]>>16;
   1.193 +  /*Stage 3:*/
   1.194 +  r=t[6]+t[5];
   1.195 +  t[5]=t[6]-t[5];
   1.196 +  t[6]=r;
   1.197 +  /*Stage 4:*/
   1.198 +  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
   1.199 +  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
   1.200 +  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
   1.201 +  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
   1.202 +  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
   1.203 +  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
   1.204 +  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
   1.205 +  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
   1.206 +}
   1.207 +
   1.208 +/*Performs an inverse 8 point Type-II DCT transform.
   1.209 +  The output is scaled by a factor of 2 relative to the orthonormal version of
   1.210 +   the transform.
   1.211 +  _y: The buffer to store the result in.
   1.212 +      Data will be placed in every 8th entry (e.g., in a column of an 8x8
   1.213 +       block).
   1.214 +  _x: The input coefficients.
   1.215 +      Only the first entry is used.
   1.216 +      The other 7 are assumed to be 0.*/
   1.217 +static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
   1.218 +  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
   1.219 +   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
   1.220 +}
   1.221 +
   1.222 +/*Performs an inverse 8x8 Type-II DCT transform.
   1.223 +  The input is assumed to be scaled by a factor of 4 relative to orthonormal
   1.224 +   version of the transform.
   1.225 +  All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
   1.226 +   x  x  0  0  0  0  0  0
   1.227 +   x  0  0  0  0  0  0  0
   1.228 +   0  0  0  0  0  0  0  0
   1.229 +   0  0  0  0  0  0  0  0
   1.230 +   0  0  0  0  0  0  0  0
   1.231 +   0  0  0  0  0  0  0  0
   1.232 +   0  0  0  0  0  0  0  0
   1.233 +   0  0  0  0  0  0  0  0
   1.234 +  _y: The buffer to store the result in.
   1.235 +      This may be the same as _x.
   1.236 +  _x: The input coefficients.*/
   1.237 +static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){
   1.238 +  ogg_int16_t w[64];
   1.239 +  int         i;
   1.240 +  /*Transform rows of x into columns of w.*/
   1.241 +  idct8_2(w,_x);
   1.242 +  idct8_1(w+1,_x+8);
   1.243 +  /*Transform rows of w into columns of y.*/
   1.244 +  for(i=0;i<8;i++)idct8_2(_y+i,w+i*8);
   1.245 +  /*Adjust for the scale factor.*/
   1.246 +  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
   1.247 +  /*Clear input data for next block (decoder only).*/
   1.248 +  if(_x!=_y)_x[0]=_x[1]=_x[8]=0;
   1.249 +}
   1.250 +
   1.251 +/*Performs an inverse 8x8 Type-II DCT transform.
   1.252 +  The input is assumed to be scaled by a factor of 4 relative to orthonormal
   1.253 +   version of the transform.
   1.254 +  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
   1.255 +   x  x  x  x  0  0  0  0
   1.256 +   x  x  x  0  0  0  0  0
   1.257 +   x  x  0  0  0  0  0  0
   1.258 +   x  0  0  0  0  0  0  0
   1.259 +   0  0  0  0  0  0  0  0
   1.260 +   0  0  0  0  0  0  0  0
   1.261 +   0  0  0  0  0  0  0  0
   1.262 +   0  0  0  0  0  0  0  0
   1.263 +  _y: The buffer to store the result in.
   1.264 +      This may be the same as _x.
   1.265 +  _x: The input coefficients.*/
   1.266 +static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
   1.267 +  ogg_int16_t w[64];
   1.268 +  int         i;
   1.269 +  /*Transform rows of x into columns of w.*/
   1.270 +  idct8_4(w,_x);
   1.271 +  idct8_3(w+1,_x+8);
   1.272 +  idct8_2(w+2,_x+16);
   1.273 +  idct8_1(w+3,_x+24);
   1.274 +  /*Transform rows of w into columns of y.*/
   1.275 +  for(i=0;i<8;i++)idct8_4(_y+i,w+i*8);
   1.276 +  /*Adjust for the scale factor.*/
   1.277 +  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
   1.278 +  /*Clear input data for next block (decoder only).*/
   1.279 +  if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0;
   1.280 +}
   1.281 +
   1.282 +/*Performs an inverse 8x8 Type-II DCT transform.
   1.283 +  The input is assumed to be scaled by a factor of 4 relative to orthonormal
   1.284 +   version of the transform.
   1.285 +  _y: The buffer to store the result in.
   1.286 +      This may be the same as _x.
   1.287 +  _x: The input coefficients.*/
   1.288 +static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
   1.289 +  ogg_int16_t w[64];
   1.290 +  int         i;
   1.291 +  /*Transform rows of x into columns of w.*/
   1.292 +  for(i=0;i<8;i++)idct8(w+i,_x+i*8);
   1.293 +  /*Transform rows of w into columns of y.*/
   1.294 +  for(i=0;i<8;i++)idct8(_y+i,w+i*8);
   1.295 +  /*Adjust for the scale factor.*/
   1.296 +  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
   1.297 +  if(_x!=_y)for(i=0;i<64;i++)_x[i]=0;
   1.298 +}
   1.299 +
   1.300 +/*Performs an inverse 8x8 Type-II DCT transform.
   1.301 +  The input is assumed to be scaled by a factor of 4 relative to orthonormal
   1.302 +   version of the transform.*/
   1.303 +void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
   1.304 +  /*_last_zzi is subtly different from an actual count of the number of
   1.305 +     coefficients we decoded for this block.
   1.306 +    It contains the value of zzi BEFORE the final token in the block was
   1.307 +     decoded.
   1.308 +    In most cases this is an EOB token (the continuation of an EOB run from a
   1.309 +     previous block counts), and so this is the same as the coefficient count.
   1.310 +    However, in the case that the last token was NOT an EOB token, but filled
   1.311 +     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
   1.312 +    Provided the last token was not a pure zero run, the minimum value it can
   1.313 +     be is 46, and so that doesn't affect any of the cases in this routine.
   1.314 +    However, if the last token WAS a pure zero run of length 63, then _last_zzi
   1.315 +     will be 1 while the number of coefficients decoded is 64.
   1.316 +    Thus, we will trigger the following special case, where the real
   1.317 +     coefficient count would not.
   1.318 +    Note also that a zero run of length 64 will give _last_zzi a value of 0,
   1.319 +     but we still process the DC coefficient, which might have a non-zero value
   1.320 +     due to DC prediction.
   1.321 +    Although convoluted, this is arguably the correct behavior: it allows us to
   1.322 +     use a smaller transform when the block ends with a long zero run instead
   1.323 +     of a normal EOB token.
   1.324 +    It could be smarter... multiple separate zero runs at the end of a block
   1.325 +     will fool it, but an encoder that generates these really deserves what it
   1.326 +     gets.
   1.327 +    Needless to say we inherited this approach from VP3.*/
   1.328 +  /*Then perform the iDCT.*/
   1.329 +  if(_last_zzi<=3)oc_idct8x8_3(_y,_x);
   1.330 +  else if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
   1.331 +  else oc_idct8x8_slow(_y,_x);
   1.332 +}

mercurial