media/libtheora/lib/idct.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /********************************************************************
michael@0 2 * *
michael@0 3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
michael@0 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
michael@0 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
michael@0 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
michael@0 7 * *
michael@0 8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
michael@0 9 * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
michael@0 10 * *
michael@0 11 ********************************************************************
michael@0 12
michael@0 13 function:
michael@0 14 last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $
michael@0 15
michael@0 16 ********************************************************************/
michael@0 17
michael@0 18 #include <string.h>
michael@0 19 #include "internal.h"
michael@0 20 #include "dct.h"
michael@0 21
michael@0 22 /*Performs an inverse 8 point Type-II DCT transform.
michael@0 23 The output is scaled by a factor of 2 relative to the orthonormal version of
michael@0 24 the transform.
michael@0 25 _y: The buffer to store the result in.
michael@0 26 Data will be placed in every 8th entry (e.g., in a column of an 8x8
michael@0 27 block).
michael@0 28 _x: The input coefficients.
michael@0 29 The first 8 entries are used (e.g., from a row of an 8x8 block).*/
michael@0 30 static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
michael@0 31 ogg_int32_t t[8];
michael@0 32 ogg_int32_t r;
michael@0 33 /*Stage 1:*/
michael@0 34 /*0-1 butterfly.*/
michael@0 35 t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
michael@0 36 t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
michael@0 37 /*2-3 rotation by 6pi/16.*/
michael@0 38 t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
michael@0 39 t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
michael@0 40 /*4-7 rotation by 7pi/16.*/
michael@0 41 t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
michael@0 42 /*5-6 rotation by 3pi/16.*/
michael@0 43 t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
michael@0 44 t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
michael@0 45 t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
michael@0 46 /*Stage 2:*/
michael@0 47 /*4-5 butterfly.*/
michael@0 48 r=t[4]+t[5];
michael@0 49 t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
michael@0 50 t[4]=r;
michael@0 51 /*7-6 butterfly.*/
michael@0 52 r=t[7]+t[6];
michael@0 53 t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
michael@0 54 t[7]=r;
michael@0 55 /*Stage 3:*/
michael@0 56 /*0-3 butterfly.*/
michael@0 57 r=t[0]+t[3];
michael@0 58 t[3]=t[0]-t[3];
michael@0 59 t[0]=r;
michael@0 60 /*1-2 butterfly.*/
michael@0 61 r=t[1]+t[2];
michael@0 62 t[2]=t[1]-t[2];
michael@0 63 t[1]=r;
michael@0 64 /*6-5 butterfly.*/
michael@0 65 r=t[6]+t[5];
michael@0 66 t[5]=t[6]-t[5];
michael@0 67 t[6]=r;
michael@0 68 /*Stage 4:*/
michael@0 69 /*0-7 butterfly.*/
michael@0 70 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
michael@0 71 /*1-6 butterfly.*/
michael@0 72 _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
michael@0 73 /*2-5 butterfly.*/
michael@0 74 _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
michael@0 75 /*3-4 butterfly.*/
michael@0 76 _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
michael@0 77 _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
michael@0 78 _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
michael@0 79 _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
michael@0 80 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
michael@0 81 }
michael@0 82
michael@0 83 /*Performs an inverse 8 point Type-II DCT transform.
michael@0 84 The output is scaled by a factor of 2 relative to the orthonormal version of
michael@0 85 the transform.
michael@0 86 _y: The buffer to store the result in.
michael@0 87 Data will be placed in every 8th entry (e.g., in a column of an 8x8
michael@0 88 block).
michael@0 89 _x: The input coefficients.
michael@0 90 Only the first 4 entries are used.
michael@0 91 The other 4 are assumed to be 0.*/
michael@0 92 static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
michael@0 93 ogg_int32_t t[8];
michael@0 94 ogg_int32_t r;
michael@0 95 /*Stage 1:*/
michael@0 96 t[0]=OC_C4S4*_x[0]>>16;
michael@0 97 t[2]=OC_C6S2*_x[2]>>16;
michael@0 98 t[3]=OC_C2S6*_x[2]>>16;
michael@0 99 t[4]=OC_C7S1*_x[1]>>16;
michael@0 100 t[5]=-(OC_C5S3*_x[3]>>16);
michael@0 101 t[6]=OC_C3S5*_x[3]>>16;
michael@0 102 t[7]=OC_C1S7*_x[1]>>16;
michael@0 103 /*Stage 2:*/
michael@0 104 r=t[4]+t[5];
michael@0 105 t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
michael@0 106 t[4]=r;
michael@0 107 r=t[7]+t[6];
michael@0 108 t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
michael@0 109 t[7]=r;
michael@0 110 /*Stage 3:*/
michael@0 111 t[1]=t[0]+t[2];
michael@0 112 t[2]=t[0]-t[2];
michael@0 113 r=t[0]+t[3];
michael@0 114 t[3]=t[0]-t[3];
michael@0 115 t[0]=r;
michael@0 116 r=t[6]+t[5];
michael@0 117 t[5]=t[6]-t[5];
michael@0 118 t[6]=r;
michael@0 119 /*Stage 4:*/
michael@0 120 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
michael@0 121 _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
michael@0 122 _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
michael@0 123 _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
michael@0 124 _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
michael@0 125 _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
michael@0 126 _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
michael@0 127 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
michael@0 128 }
michael@0 129
michael@0 130 /*Performs an inverse 8 point Type-II DCT transform.
michael@0 131 The output is scaled by a factor of 2 relative to the orthonormal version of
michael@0 132 the transform.
michael@0 133 _y: The buffer to store the result in.
michael@0 134 Data will be placed in every 8th entry (e.g., in a column of an 8x8
michael@0 135 block).
michael@0 136 _x: The input coefficients.
michael@0 137 Only the first 3 entries are used.
michael@0 138 The other 5 are assumed to be 0.*/
michael@0 139 static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
michael@0 140 ogg_int32_t t[8];
michael@0 141 ogg_int32_t r;
michael@0 142 /*Stage 1:*/
michael@0 143 t[0]=OC_C4S4*_x[0]>>16;
michael@0 144 t[2]=OC_C6S2*_x[2]>>16;
michael@0 145 t[3]=OC_C2S6*_x[2]>>16;
michael@0 146 t[4]=OC_C7S1*_x[1]>>16;
michael@0 147 t[7]=OC_C1S7*_x[1]>>16;
michael@0 148 /*Stage 2:*/
michael@0 149 t[5]=OC_C4S4*t[4]>>16;
michael@0 150 t[6]=OC_C4S4*t[7]>>16;
michael@0 151 /*Stage 3:*/
michael@0 152 t[1]=t[0]+t[2];
michael@0 153 t[2]=t[0]-t[2];
michael@0 154 r=t[0]+t[3];
michael@0 155 t[3]=t[0]-t[3];
michael@0 156 t[0]=r;
michael@0 157 r=t[6]+t[5];
michael@0 158 t[5]=t[6]-t[5];
michael@0 159 t[6]=r;
michael@0 160 /*Stage 4:*/
michael@0 161 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
michael@0 162 _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
michael@0 163 _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
michael@0 164 _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
michael@0 165 _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
michael@0 166 _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
michael@0 167 _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
michael@0 168 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
michael@0 169 }
michael@0 170
michael@0 171 /*Performs an inverse 8 point Type-II DCT transform.
michael@0 172 The output is scaled by a factor of 2 relative to the orthonormal version of
michael@0 173 the transform.
michael@0 174 _y: The buffer to store the result in.
michael@0 175 Data will be placed in every 8th entry (e.g., in a column of an 8x8
michael@0 176 block).
michael@0 177 _x: The input coefficients.
michael@0 178 Only the first 2 entries are used.
michael@0 179 The other 6 are assumed to be 0.*/
michael@0 180 static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
michael@0 181 ogg_int32_t t[8];
michael@0 182 ogg_int32_t r;
michael@0 183 /*Stage 1:*/
michael@0 184 t[0]=OC_C4S4*_x[0]>>16;
michael@0 185 t[4]=OC_C7S1*_x[1]>>16;
michael@0 186 t[7]=OC_C1S7*_x[1]>>16;
michael@0 187 /*Stage 2:*/
michael@0 188 t[5]=OC_C4S4*t[4]>>16;
michael@0 189 t[6]=OC_C4S4*t[7]>>16;
michael@0 190 /*Stage 3:*/
michael@0 191 r=t[6]+t[5];
michael@0 192 t[5]=t[6]-t[5];
michael@0 193 t[6]=r;
michael@0 194 /*Stage 4:*/
michael@0 195 _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
michael@0 196 _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
michael@0 197 _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
michael@0 198 _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
michael@0 199 _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
michael@0 200 _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
michael@0 201 _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
michael@0 202 _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
michael@0 203 }
michael@0 204
michael@0 205 /*Performs an inverse 8 point Type-II DCT transform.
michael@0 206 The output is scaled by a factor of 2 relative to the orthonormal version of
michael@0 207 the transform.
michael@0 208 _y: The buffer to store the result in.
michael@0 209 Data will be placed in every 8th entry (e.g., in a column of an 8x8
michael@0 210 block).
michael@0 211 _x: The input coefficients.
michael@0 212 Only the first entry is used.
michael@0 213 The other 7 are assumed to be 0.*/
michael@0 214 static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
michael@0 215 _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
michael@0 216 _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
michael@0 217 }
michael@0 218
michael@0 219 /*Performs an inverse 8x8 Type-II DCT transform.
michael@0 220 The input is assumed to be scaled by a factor of 4 relative to orthonormal
michael@0 221 version of the transform.
michael@0 222 All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
michael@0 223 x x 0 0 0 0 0 0
michael@0 224 x 0 0 0 0 0 0 0
michael@0 225 0 0 0 0 0 0 0 0
michael@0 226 0 0 0 0 0 0 0 0
michael@0 227 0 0 0 0 0 0 0 0
michael@0 228 0 0 0 0 0 0 0 0
michael@0 229 0 0 0 0 0 0 0 0
michael@0 230 0 0 0 0 0 0 0 0
michael@0 231 _y: The buffer to store the result in.
michael@0 232 This may be the same as _x.
michael@0 233 _x: The input coefficients.*/
michael@0 234 static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){
michael@0 235 ogg_int16_t w[64];
michael@0 236 int i;
michael@0 237 /*Transform rows of x into columns of w.*/
michael@0 238 idct8_2(w,_x);
michael@0 239 idct8_1(w+1,_x+8);
michael@0 240 /*Transform rows of w into columns of y.*/
michael@0 241 for(i=0;i<8;i++)idct8_2(_y+i,w+i*8);
michael@0 242 /*Adjust for the scale factor.*/
michael@0 243 for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
michael@0 244 /*Clear input data for next block (decoder only).*/
michael@0 245 if(_x!=_y)_x[0]=_x[1]=_x[8]=0;
michael@0 246 }
michael@0 247
michael@0 248 /*Performs an inverse 8x8 Type-II DCT transform.
michael@0 249 The input is assumed to be scaled by a factor of 4 relative to orthonormal
michael@0 250 version of the transform.
michael@0 251 All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
michael@0 252 x x x x 0 0 0 0
michael@0 253 x x x 0 0 0 0 0
michael@0 254 x x 0 0 0 0 0 0
michael@0 255 x 0 0 0 0 0 0 0
michael@0 256 0 0 0 0 0 0 0 0
michael@0 257 0 0 0 0 0 0 0 0
michael@0 258 0 0 0 0 0 0 0 0
michael@0 259 0 0 0 0 0 0 0 0
michael@0 260 _y: The buffer to store the result in.
michael@0 261 This may be the same as _x.
michael@0 262 _x: The input coefficients.*/
michael@0 263 static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
michael@0 264 ogg_int16_t w[64];
michael@0 265 int i;
michael@0 266 /*Transform rows of x into columns of w.*/
michael@0 267 idct8_4(w,_x);
michael@0 268 idct8_3(w+1,_x+8);
michael@0 269 idct8_2(w+2,_x+16);
michael@0 270 idct8_1(w+3,_x+24);
michael@0 271 /*Transform rows of w into columns of y.*/
michael@0 272 for(i=0;i<8;i++)idct8_4(_y+i,w+i*8);
michael@0 273 /*Adjust for the scale factor.*/
michael@0 274 for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
michael@0 275 /*Clear input data for next block (decoder only).*/
michael@0 276 if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0;
michael@0 277 }
michael@0 278
michael@0 279 /*Performs an inverse 8x8 Type-II DCT transform.
michael@0 280 The input is assumed to be scaled by a factor of 4 relative to orthonormal
michael@0 281 version of the transform.
michael@0 282 _y: The buffer to store the result in.
michael@0 283 This may be the same as _x.
michael@0 284 _x: The input coefficients.*/
michael@0 285 static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
michael@0 286 ogg_int16_t w[64];
michael@0 287 int i;
michael@0 288 /*Transform rows of x into columns of w.*/
michael@0 289 for(i=0;i<8;i++)idct8(w+i,_x+i*8);
michael@0 290 /*Transform rows of w into columns of y.*/
michael@0 291 for(i=0;i<8;i++)idct8(_y+i,w+i*8);
michael@0 292 /*Adjust for the scale factor.*/
michael@0 293 for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
michael@0 294 if(_x!=_y)for(i=0;i<64;i++)_x[i]=0;
michael@0 295 }
michael@0 296
michael@0 297 /*Performs an inverse 8x8 Type-II DCT transform.
michael@0 298 The input is assumed to be scaled by a factor of 4 relative to orthonormal
michael@0 299 version of the transform.*/
michael@0 300 void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
michael@0 301 /*_last_zzi is subtly different from an actual count of the number of
michael@0 302 coefficients we decoded for this block.
michael@0 303 It contains the value of zzi BEFORE the final token in the block was
michael@0 304 decoded.
michael@0 305 In most cases this is an EOB token (the continuation of an EOB run from a
michael@0 306 previous block counts), and so this is the same as the coefficient count.
michael@0 307 However, in the case that the last token was NOT an EOB token, but filled
michael@0 308 the block up with exactly 64 coefficients, _last_zzi will be less than 64.
michael@0 309 Provided the last token was not a pure zero run, the minimum value it can
michael@0 310 be is 46, and so that doesn't affect any of the cases in this routine.
michael@0 311 However, if the last token WAS a pure zero run of length 63, then _last_zzi
michael@0 312 will be 1 while the number of coefficients decoded is 64.
michael@0 313 Thus, we will trigger the following special case, where the real
michael@0 314 coefficient count would not.
michael@0 315 Note also that a zero run of length 64 will give _last_zzi a value of 0,
michael@0 316 but we still process the DC coefficient, which might have a non-zero value
michael@0 317 due to DC prediction.
michael@0 318 Although convoluted, this is arguably the correct behavior: it allows us to
michael@0 319 use a smaller transform when the block ends with a long zero run instead
michael@0 320 of a normal EOB token.
michael@0 321 It could be smarter... multiple separate zero runs at the end of a block
michael@0 322 will fool it, but an encoder that generates these really deserves what it
michael@0 323 gets.
michael@0 324 Needless to say we inherited this approach from VP3.*/
michael@0 325 /*Then perform the iDCT.*/
michael@0 326 if(_last_zzi<=3)oc_idct8x8_3(_y,_x);
michael@0 327 else if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
michael@0 328 else oc_idct8x8_slow(_y,_x);
michael@0 329 }

mercurial