Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /******************************************************************** |
michael@0 | 2 | * * |
michael@0 | 3 | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
michael@0 | 4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
michael@0 | 5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
michael@0 | 6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
michael@0 | 7 | * * |
michael@0 | 8 | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
michael@0 | 9 | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
michael@0 | 10 | * * |
michael@0 | 11 | ******************************************************************** |
michael@0 | 12 | |
michael@0 | 13 | function: |
michael@0 | 14 | last mod: $Id: idct.c 17410 2010-09-21 21:53:48Z tterribe $ |
michael@0 | 15 | |
michael@0 | 16 | ********************************************************************/ |
michael@0 | 17 | |
michael@0 | 18 | #include <string.h> |
michael@0 | 19 | #include "internal.h" |
michael@0 | 20 | #include "dct.h" |
michael@0 | 21 | |
michael@0 | 22 | /*Performs an inverse 8 point Type-II DCT transform. |
michael@0 | 23 | The output is scaled by a factor of 2 relative to the orthonormal version of |
michael@0 | 24 | the transform. |
michael@0 | 25 | _y: The buffer to store the result in. |
michael@0 | 26 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
michael@0 | 27 | block). |
michael@0 | 28 | _x: The input coefficients. |
michael@0 | 29 | The first 8 entries are used (e.g., from a row of an 8x8 block).*/ |
michael@0 | 30 | static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
michael@0 | 31 | ogg_int32_t t[8]; |
michael@0 | 32 | ogg_int32_t r; |
michael@0 | 33 | /*Stage 1:*/ |
michael@0 | 34 | /*0-1 butterfly.*/ |
michael@0 | 35 | t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; |
michael@0 | 36 | t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; |
michael@0 | 37 | /*2-3 rotation by 6pi/16.*/ |
michael@0 | 38 | t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); |
michael@0 | 39 | t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); |
michael@0 | 40 | /*4-7 rotation by 7pi/16.*/ |
michael@0 | 41 | t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); |
michael@0 | 42 | /*5-6 rotation by 3pi/16.*/ |
michael@0 | 43 | t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); |
michael@0 | 44 | t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); |
michael@0 | 45 | t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); |
michael@0 | 46 | /*Stage 2:*/ |
michael@0 | 47 | /*4-5 butterfly.*/ |
michael@0 | 48 | r=t[4]+t[5]; |
michael@0 | 49 | t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
michael@0 | 50 | t[4]=r; |
michael@0 | 51 | /*7-6 butterfly.*/ |
michael@0 | 52 | r=t[7]+t[6]; |
michael@0 | 53 | t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
michael@0 | 54 | t[7]=r; |
michael@0 | 55 | /*Stage 3:*/ |
michael@0 | 56 | /*0-3 butterfly.*/ |
michael@0 | 57 | r=t[0]+t[3]; |
michael@0 | 58 | t[3]=t[0]-t[3]; |
michael@0 | 59 | t[0]=r; |
michael@0 | 60 | /*1-2 butterfly.*/ |
michael@0 | 61 | r=t[1]+t[2]; |
michael@0 | 62 | t[2]=t[1]-t[2]; |
michael@0 | 63 | t[1]=r; |
michael@0 | 64 | /*6-5 butterfly.*/ |
michael@0 | 65 | r=t[6]+t[5]; |
michael@0 | 66 | t[5]=t[6]-t[5]; |
michael@0 | 67 | t[6]=r; |
michael@0 | 68 | /*Stage 4:*/ |
michael@0 | 69 | /*0-7 butterfly.*/ |
michael@0 | 70 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
michael@0 | 71 | /*1-6 butterfly.*/ |
michael@0 | 72 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
michael@0 | 73 | /*2-5 butterfly.*/ |
michael@0 | 74 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
michael@0 | 75 | /*3-4 butterfly.*/ |
michael@0 | 76 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
michael@0 | 77 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
michael@0 | 78 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
michael@0 | 79 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
michael@0 | 80 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
michael@0 | 81 | } |
michael@0 | 82 | |
michael@0 | 83 | /*Performs an inverse 8 point Type-II DCT transform. |
michael@0 | 84 | The output is scaled by a factor of 2 relative to the orthonormal version of |
michael@0 | 85 | the transform. |
michael@0 | 86 | _y: The buffer to store the result in. |
michael@0 | 87 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
michael@0 | 88 | block). |
michael@0 | 89 | _x: The input coefficients. |
michael@0 | 90 | Only the first 4 entries are used. |
michael@0 | 91 | The other 4 are assumed to be 0.*/ |
michael@0 | 92 | static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
michael@0 | 93 | ogg_int32_t t[8]; |
michael@0 | 94 | ogg_int32_t r; |
michael@0 | 95 | /*Stage 1:*/ |
michael@0 | 96 | t[0]=OC_C4S4*_x[0]>>16; |
michael@0 | 97 | t[2]=OC_C6S2*_x[2]>>16; |
michael@0 | 98 | t[3]=OC_C2S6*_x[2]>>16; |
michael@0 | 99 | t[4]=OC_C7S1*_x[1]>>16; |
michael@0 | 100 | t[5]=-(OC_C5S3*_x[3]>>16); |
michael@0 | 101 | t[6]=OC_C3S5*_x[3]>>16; |
michael@0 | 102 | t[7]=OC_C1S7*_x[1]>>16; |
michael@0 | 103 | /*Stage 2:*/ |
michael@0 | 104 | r=t[4]+t[5]; |
michael@0 | 105 | t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
michael@0 | 106 | t[4]=r; |
michael@0 | 107 | r=t[7]+t[6]; |
michael@0 | 108 | t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
michael@0 | 109 | t[7]=r; |
michael@0 | 110 | /*Stage 3:*/ |
michael@0 | 111 | t[1]=t[0]+t[2]; |
michael@0 | 112 | t[2]=t[0]-t[2]; |
michael@0 | 113 | r=t[0]+t[3]; |
michael@0 | 114 | t[3]=t[0]-t[3]; |
michael@0 | 115 | t[0]=r; |
michael@0 | 116 | r=t[6]+t[5]; |
michael@0 | 117 | t[5]=t[6]-t[5]; |
michael@0 | 118 | t[6]=r; |
michael@0 | 119 | /*Stage 4:*/ |
michael@0 | 120 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
michael@0 | 121 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
michael@0 | 122 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
michael@0 | 123 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
michael@0 | 124 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
michael@0 | 125 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
michael@0 | 126 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
michael@0 | 127 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
michael@0 | 128 | } |
michael@0 | 129 | |
michael@0 | 130 | /*Performs an inverse 8 point Type-II DCT transform. |
michael@0 | 131 | The output is scaled by a factor of 2 relative to the orthonormal version of |
michael@0 | 132 | the transform. |
michael@0 | 133 | _y: The buffer to store the result in. |
michael@0 | 134 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
michael@0 | 135 | block). |
michael@0 | 136 | _x: The input coefficients. |
michael@0 | 137 | Only the first 3 entries are used. |
michael@0 | 138 | The other 5 are assumed to be 0.*/ |
michael@0 | 139 | static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
michael@0 | 140 | ogg_int32_t t[8]; |
michael@0 | 141 | ogg_int32_t r; |
michael@0 | 142 | /*Stage 1:*/ |
michael@0 | 143 | t[0]=OC_C4S4*_x[0]>>16; |
michael@0 | 144 | t[2]=OC_C6S2*_x[2]>>16; |
michael@0 | 145 | t[3]=OC_C2S6*_x[2]>>16; |
michael@0 | 146 | t[4]=OC_C7S1*_x[1]>>16; |
michael@0 | 147 | t[7]=OC_C1S7*_x[1]>>16; |
michael@0 | 148 | /*Stage 2:*/ |
michael@0 | 149 | t[5]=OC_C4S4*t[4]>>16; |
michael@0 | 150 | t[6]=OC_C4S4*t[7]>>16; |
michael@0 | 151 | /*Stage 3:*/ |
michael@0 | 152 | t[1]=t[0]+t[2]; |
michael@0 | 153 | t[2]=t[0]-t[2]; |
michael@0 | 154 | r=t[0]+t[3]; |
michael@0 | 155 | t[3]=t[0]-t[3]; |
michael@0 | 156 | t[0]=r; |
michael@0 | 157 | r=t[6]+t[5]; |
michael@0 | 158 | t[5]=t[6]-t[5]; |
michael@0 | 159 | t[6]=r; |
michael@0 | 160 | /*Stage 4:*/ |
michael@0 | 161 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
michael@0 | 162 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
michael@0 | 163 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
michael@0 | 164 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
michael@0 | 165 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
michael@0 | 166 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
michael@0 | 167 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
michael@0 | 168 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
michael@0 | 169 | } |
michael@0 | 170 | |
michael@0 | 171 | /*Performs an inverse 8 point Type-II DCT transform. |
michael@0 | 172 | The output is scaled by a factor of 2 relative to the orthonormal version of |
michael@0 | 173 | the transform. |
michael@0 | 174 | _y: The buffer to store the result in. |
michael@0 | 175 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
michael@0 | 176 | block). |
michael@0 | 177 | _x: The input coefficients. |
michael@0 | 178 | Only the first 2 entries are used. |
michael@0 | 179 | The other 6 are assumed to be 0.*/ |
michael@0 | 180 | static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
michael@0 | 181 | ogg_int32_t t[8]; |
michael@0 | 182 | ogg_int32_t r; |
michael@0 | 183 | /*Stage 1:*/ |
michael@0 | 184 | t[0]=OC_C4S4*_x[0]>>16; |
michael@0 | 185 | t[4]=OC_C7S1*_x[1]>>16; |
michael@0 | 186 | t[7]=OC_C1S7*_x[1]>>16; |
michael@0 | 187 | /*Stage 2:*/ |
michael@0 | 188 | t[5]=OC_C4S4*t[4]>>16; |
michael@0 | 189 | t[6]=OC_C4S4*t[7]>>16; |
michael@0 | 190 | /*Stage 3:*/ |
michael@0 | 191 | r=t[6]+t[5]; |
michael@0 | 192 | t[5]=t[6]-t[5]; |
michael@0 | 193 | t[6]=r; |
michael@0 | 194 | /*Stage 4:*/ |
michael@0 | 195 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
michael@0 | 196 | _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); |
michael@0 | 197 | _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); |
michael@0 | 198 | _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); |
michael@0 | 199 | _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); |
michael@0 | 200 | _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); |
michael@0 | 201 | _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); |
michael@0 | 202 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
michael@0 | 203 | } |
michael@0 | 204 | |
michael@0 | 205 | /*Performs an inverse 8 point Type-II DCT transform. |
michael@0 | 206 | The output is scaled by a factor of 2 relative to the orthonormal version of |
michael@0 | 207 | the transform. |
michael@0 | 208 | _y: The buffer to store the result in. |
michael@0 | 209 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
michael@0 | 210 | block). |
michael@0 | 211 | _x: The input coefficients. |
michael@0 | 212 | Only the first entry is used. |
michael@0 | 213 | The other 7 are assumed to be 0.*/ |
michael@0 | 214 | static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ |
michael@0 | 215 | _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= |
michael@0 | 216 | _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); |
michael@0 | 217 | } |
michael@0 | 218 | |
michael@0 | 219 | /*Performs an inverse 8x8 Type-II DCT transform. |
michael@0 | 220 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
michael@0 | 221 | version of the transform. |
michael@0 | 222 | All coefficients but the first 3 in zig-zag scan order are assumed to be 0: |
michael@0 | 223 | x x 0 0 0 0 0 0 |
michael@0 | 224 | x 0 0 0 0 0 0 0 |
michael@0 | 225 | 0 0 0 0 0 0 0 0 |
michael@0 | 226 | 0 0 0 0 0 0 0 0 |
michael@0 | 227 | 0 0 0 0 0 0 0 0 |
michael@0 | 228 | 0 0 0 0 0 0 0 0 |
michael@0 | 229 | 0 0 0 0 0 0 0 0 |
michael@0 | 230 | 0 0 0 0 0 0 0 0 |
michael@0 | 231 | _y: The buffer to store the result in. |
michael@0 | 232 | This may be the same as _x. |
michael@0 | 233 | _x: The input coefficients.*/ |
michael@0 | 234 | static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
michael@0 | 235 | ogg_int16_t w[64]; |
michael@0 | 236 | int i; |
michael@0 | 237 | /*Transform rows of x into columns of w.*/ |
michael@0 | 238 | idct8_2(w,_x); |
michael@0 | 239 | idct8_1(w+1,_x+8); |
michael@0 | 240 | /*Transform rows of w into columns of y.*/ |
michael@0 | 241 | for(i=0;i<8;i++)idct8_2(_y+i,w+i*8); |
michael@0 | 242 | /*Adjust for the scale factor.*/ |
michael@0 | 243 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
michael@0 | 244 | /*Clear input data for next block (decoder only).*/ |
michael@0 | 245 | if(_x!=_y)_x[0]=_x[1]=_x[8]=0; |
michael@0 | 246 | } |
michael@0 | 247 | |
michael@0 | 248 | /*Performs an inverse 8x8 Type-II DCT transform. |
michael@0 | 249 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
michael@0 | 250 | version of the transform. |
michael@0 | 251 | All coefficients but the first 10 in zig-zag scan order are assumed to be 0: |
michael@0 | 252 | x x x x 0 0 0 0 |
michael@0 | 253 | x x x 0 0 0 0 0 |
michael@0 | 254 | x x 0 0 0 0 0 0 |
michael@0 | 255 | x 0 0 0 0 0 0 0 |
michael@0 | 256 | 0 0 0 0 0 0 0 0 |
michael@0 | 257 | 0 0 0 0 0 0 0 0 |
michael@0 | 258 | 0 0 0 0 0 0 0 0 |
michael@0 | 259 | 0 0 0 0 0 0 0 0 |
michael@0 | 260 | _y: The buffer to store the result in. |
michael@0 | 261 | This may be the same as _x. |
michael@0 | 262 | _x: The input coefficients.*/ |
michael@0 | 263 | static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
michael@0 | 264 | ogg_int16_t w[64]; |
michael@0 | 265 | int i; |
michael@0 | 266 | /*Transform rows of x into columns of w.*/ |
michael@0 | 267 | idct8_4(w,_x); |
michael@0 | 268 | idct8_3(w+1,_x+8); |
michael@0 | 269 | idct8_2(w+2,_x+16); |
michael@0 | 270 | idct8_1(w+3,_x+24); |
michael@0 | 271 | /*Transform rows of w into columns of y.*/ |
michael@0 | 272 | for(i=0;i<8;i++)idct8_4(_y+i,w+i*8); |
michael@0 | 273 | /*Adjust for the scale factor.*/ |
michael@0 | 274 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
michael@0 | 275 | /*Clear input data for next block (decoder only).*/ |
michael@0 | 276 | if(_x!=_y)_x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0; |
michael@0 | 277 | } |
michael@0 | 278 | |
michael@0 | 279 | /*Performs an inverse 8x8 Type-II DCT transform. |
michael@0 | 280 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
michael@0 | 281 | version of the transform. |
michael@0 | 282 | _y: The buffer to store the result in. |
michael@0 | 283 | This may be the same as _x. |
michael@0 | 284 | _x: The input coefficients.*/ |
michael@0 | 285 | static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
michael@0 | 286 | ogg_int16_t w[64]; |
michael@0 | 287 | int i; |
michael@0 | 288 | /*Transform rows of x into columns of w.*/ |
michael@0 | 289 | for(i=0;i<8;i++)idct8(w+i,_x+i*8); |
michael@0 | 290 | /*Transform rows of w into columns of y.*/ |
michael@0 | 291 | for(i=0;i<8;i++)idct8(_y+i,w+i*8); |
michael@0 | 292 | /*Adjust for the scale factor.*/ |
michael@0 | 293 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
michael@0 | 294 | if(_x!=_y)for(i=0;i<64;i++)_x[i]=0; |
michael@0 | 295 | } |
michael@0 | 296 | |
michael@0 | 297 | /*Performs an inverse 8x8 Type-II DCT transform. |
michael@0 | 298 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
michael@0 | 299 | version of the transform.*/ |
michael@0 | 300 | void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){ |
michael@0 | 301 | /*_last_zzi is subtly different from an actual count of the number of |
michael@0 | 302 | coefficients we decoded for this block. |
michael@0 | 303 | It contains the value of zzi BEFORE the final token in the block was |
michael@0 | 304 | decoded. |
michael@0 | 305 | In most cases this is an EOB token (the continuation of an EOB run from a |
michael@0 | 306 | previous block counts), and so this is the same as the coefficient count. |
michael@0 | 307 | However, in the case that the last token was NOT an EOB token, but filled |
michael@0 | 308 | the block up with exactly 64 coefficients, _last_zzi will be less than 64. |
michael@0 | 309 | Provided the last token was not a pure zero run, the minimum value it can |
michael@0 | 310 | be is 46, and so that doesn't affect any of the cases in this routine. |
michael@0 | 311 | However, if the last token WAS a pure zero run of length 63, then _last_zzi |
michael@0 | 312 | will be 1 while the number of coefficients decoded is 64. |
michael@0 | 313 | Thus, we will trigger the following special case, where the real |
michael@0 | 314 | coefficient count would not. |
michael@0 | 315 | Note also that a zero run of length 64 will give _last_zzi a value of 0, |
michael@0 | 316 | but we still process the DC coefficient, which might have a non-zero value |
michael@0 | 317 | due to DC prediction. |
michael@0 | 318 | Although convoluted, this is arguably the correct behavior: it allows us to |
michael@0 | 319 | use a smaller transform when the block ends with a long zero run instead |
michael@0 | 320 | of a normal EOB token. |
michael@0 | 321 | It could be smarter... multiple separate zero runs at the end of a block |
michael@0 | 322 | will fool it, but an encoder that generates these really deserves what it |
michael@0 | 323 | gets. |
michael@0 | 324 | Needless to say we inherited this approach from VP3.*/ |
michael@0 | 325 | /*Then perform the iDCT.*/ |
michael@0 | 326 | if(_last_zzi<=3)oc_idct8x8_3(_y,_x); |
michael@0 | 327 | else if(_last_zzi<=10)oc_idct8x8_10(_y,_x); |
michael@0 | 328 | else oc_idct8x8_slow(_y,_x); |
michael@0 | 329 | } |