1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libjpeg/jidctint.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2623 @@ 1.4 +/* 1.5 + * jidctint.c 1.6 + * 1.7 + * Copyright (C) 1991-1998, Thomas G. Lane. 1.8 + * Modification developed 2002-2009 by Guido Vollbeding. 1.9 + * This file is part of the Independent JPEG Group's software. 1.10 + * For conditions of distribution and use, see the accompanying README file. 1.11 + * 1.12 + * This file contains a slow-but-accurate integer implementation of the 1.13 + * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 1.14 + * must also perform dequantization of the input coefficients. 1.15 + * 1.16 + * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 1.17 + * on each row (or vice versa, but it's more convenient to emit a row at 1.18 + * a time). Direct algorithms are also available, but they are much more 1.19 + * complex and seem not to be any faster when reduced to code. 1.20 + * 1.21 + * This implementation is based on an algorithm described in 1.22 + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 1.23 + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 1.24 + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 1.25 + * The primary algorithm described there uses 11 multiplies and 29 adds. 1.26 + * We use their alternate method with 12 multiplies and 32 adds. 1.27 + * The advantage of this method is that no data path contains more than one 1.28 + * multiplication; this allows a very simple and accurate implementation in 1.29 + * scaled fixed-point arithmetic, with a minimal number of shifts. 1.30 + * 1.31 + * We also provide IDCT routines with various output sample block sizes for 1.32 + * direct resolution reduction or enlargement without additional resampling: 1.33 + * NxN (N=1...16) pixels for one 8x8 input DCT block. 1.34 + * 1.35 + * For N<8 we simply take the corresponding low-frequency coefficients of 1.36 + * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block 1.37 + * to yield the downscaled outputs. 1.38 + * This can be seen as direct low-pass downsampling from the DCT domain 1.39 + * point of view rather than the usual spatial domain point of view, 1.40 + * yielding significant computational savings and results at least 1.41 + * as good as common bilinear (averaging) spatial downsampling. 1.42 + * 1.43 + * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as 1.44 + * lower frequencies and higher frequencies assumed to be zero. 1.45 + * It turns out that the computational effort is similar to the 8x8 IDCT 1.46 + * regarding the output size. 1.47 + * Furthermore, the scaling and descaling is the same for all IDCT sizes. 1.48 + * 1.49 + * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases 1.50 + * since there would be too many additional constants to pre-calculate. 1.51 + */ 1.52 + 1.53 +#define JPEG_INTERNALS 1.54 +#include "jinclude.h" 1.55 +#include "jpeglib.h" 1.56 +#include "jdct.h" /* Private declarations for DCT subsystem */ 1.57 + 1.58 +#ifdef DCT_ISLOW_SUPPORTED 1.59 + 1.60 + 1.61 +/* 1.62 + * This module is specialized to the case DCTSIZE = 8. 1.63 + */ 1.64 + 1.65 +#if DCTSIZE != 8 1.66 + Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ 1.67 +#endif 1.68 + 1.69 + 1.70 +/* 1.71 + * The poop on this scaling stuff is as follows: 1.72 + * 1.73 + * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) 1.74 + * larger than the true IDCT outputs. The final outputs are therefore 1.75 + * a factor of N larger than desired; since N=8 this can be cured by 1.76 + * a simple right shift at the end of the algorithm. The advantage of 1.77 + * this arrangement is that we save two multiplications per 1-D IDCT, 1.78 + * because the y0 and y4 inputs need not be divided by sqrt(N). 1.79 + * 1.80 + * We have to do addition and subtraction of the integer inputs, which 1.81 + * is no problem, and multiplication by fractional constants, which is 1.82 + * a problem to do in integer arithmetic. We multiply all the constants 1.83 + * by CONST_SCALE and convert them to integer constants (thus retaining 1.84 + * CONST_BITS bits of precision in the constants). After doing a 1.85 + * multiplication we have to divide the product by CONST_SCALE, with proper 1.86 + * rounding, to produce the correct output. This division can be done 1.87 + * cheaply as a right shift of CONST_BITS bits. We postpone shifting 1.88 + * as long as possible so that partial sums can be added together with 1.89 + * full fractional precision. 1.90 + * 1.91 + * The outputs of the first pass are scaled up by PASS1_BITS bits so that 1.92 + * they are represented to better-than-integral precision. These outputs 1.93 + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 1.94 + * with the recommended scaling. (To scale up 12-bit sample data further, an 1.95 + * intermediate INT32 array would be needed.) 1.96 + * 1.97 + * To avoid overflow of the 32-bit intermediate results in pass 2, we must 1.98 + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 1.99 + * shows that the values given below are the most effective. 1.100 + */ 1.101 + 1.102 +#if BITS_IN_JSAMPLE == 8 1.103 +#define CONST_BITS 13 1.104 +#define PASS1_BITS 2 1.105 +#else 1.106 +#define CONST_BITS 13 1.107 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 1.108 +#endif 1.109 + 1.110 +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 1.111 + * causing a lot of useless floating-point operations at run time. 1.112 + * To get around this we use the following pre-calculated constants. 1.113 + * If you change CONST_BITS you may want to add appropriate values. 1.114 + * (With a reasonable C compiler, you can just rely on the FIX() macro...) 1.115 + */ 1.116 + 1.117 +#if CONST_BITS == 13 1.118 +#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ 1.119 +#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ 1.120 +#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ 1.121 +#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 1.122 +#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 1.123 +#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ 1.124 +#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ 1.125 +#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 1.126 +#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ 1.127 +#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ 1.128 +#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 1.129 +#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ 1.130 +#else 1.131 +#define FIX_0_298631336 FIX(0.298631336) 1.132 +#define FIX_0_390180644 FIX(0.390180644) 1.133 +#define FIX_0_541196100 FIX(0.541196100) 1.134 +#define FIX_0_765366865 FIX(0.765366865) 1.135 +#define FIX_0_899976223 FIX(0.899976223) 1.136 +#define FIX_1_175875602 FIX(1.175875602) 1.137 +#define FIX_1_501321110 FIX(1.501321110) 1.138 +#define FIX_1_847759065 FIX(1.847759065) 1.139 +#define FIX_1_961570560 FIX(1.961570560) 1.140 +#define FIX_2_053119869 FIX(2.053119869) 1.141 +#define FIX_2_562915447 FIX(2.562915447) 1.142 +#define FIX_3_072711026 FIX(3.072711026) 1.143 +#endif 1.144 + 1.145 + 1.146 +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 1.147 + * For 8-bit samples with the recommended scaling, all the variable 1.148 + * and constant values involved are no more than 16 bits wide, so a 1.149 + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 1.150 + * For 12-bit samples, a full 32-bit multiplication will be needed. 1.151 + */ 1.152 + 1.153 +#if BITS_IN_JSAMPLE == 8 1.154 +#define MULTIPLY(var,const) MULTIPLY16C16(var,const) 1.155 +#else 1.156 +#define MULTIPLY(var,const) ((var) * (const)) 1.157 +#endif 1.158 + 1.159 + 1.160 +/* Dequantize a coefficient by multiplying it by the multiplier-table 1.161 + * entry; produce an int result. In this module, both inputs and result 1.162 + * are 16 bits or less, so either int or short multiply will work. 1.163 + */ 1.164 + 1.165 +#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) 1.166 + 1.167 + 1.168 +/* 1.169 + * Perform dequantization and inverse DCT on one block of coefficients. 1.170 + */ 1.171 + 1.172 +GLOBAL(void) 1.173 +jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.174 + JCOEFPTR coef_block, 1.175 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.176 +{ 1.177 + INT32 tmp0, tmp1, tmp2, tmp3; 1.178 + INT32 tmp10, tmp11, tmp12, tmp13; 1.179 + INT32 z1, z2, z3, z4, z5; 1.180 + JCOEFPTR inptr; 1.181 + ISLOW_MULT_TYPE * quantptr; 1.182 + int * wsptr; 1.183 + JSAMPROW outptr; 1.184 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.185 + int ctr; 1.186 + int workspace[DCTSIZE2]; /* buffers data between passes */ 1.187 + SHIFT_TEMPS 1.188 + 1.189 + /* Pass 1: process columns from input, store into work array. */ 1.190 + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 1.191 + /* furthermore, we scale the results by 2**PASS1_BITS. */ 1.192 + 1.193 + inptr = coef_block; 1.194 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.195 + wsptr = workspace; 1.196 + for (ctr = DCTSIZE; ctr > 0; ctr--) { 1.197 + /* Due to quantization, we will usually find that many of the input 1.198 + * coefficients are zero, especially the AC terms. We can exploit this 1.199 + * by short-circuiting the IDCT calculation for any column in which all 1.200 + * the AC terms are zero. In that case each output is equal to the 1.201 + * DC coefficient (with scale factor as needed). 1.202 + * With typical images and quantization tables, half or more of the 1.203 + * column DCT calculations can be simplified this way. 1.204 + */ 1.205 + 1.206 + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 1.207 + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 1.208 + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 1.209 + inptr[DCTSIZE*7] == 0) { 1.210 + /* AC terms all zero */ 1.211 + int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 1.212 + 1.213 + wsptr[DCTSIZE*0] = dcval; 1.214 + wsptr[DCTSIZE*1] = dcval; 1.215 + wsptr[DCTSIZE*2] = dcval; 1.216 + wsptr[DCTSIZE*3] = dcval; 1.217 + wsptr[DCTSIZE*4] = dcval; 1.218 + wsptr[DCTSIZE*5] = dcval; 1.219 + wsptr[DCTSIZE*6] = dcval; 1.220 + wsptr[DCTSIZE*7] = dcval; 1.221 + 1.222 + inptr++; /* advance pointers to next column */ 1.223 + quantptr++; 1.224 + wsptr++; 1.225 + continue; 1.226 + } 1.227 + 1.228 + /* Even part: reverse the even part of the forward DCT. */ 1.229 + /* The rotator is sqrt(2)*c(-6). */ 1.230 + 1.231 + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.232 + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.233 + 1.234 + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); 1.235 + tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); 1.236 + tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); 1.237 + 1.238 + z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.239 + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.240 + 1.241 + tmp0 = (z2 + z3) << CONST_BITS; 1.242 + tmp1 = (z2 - z3) << CONST_BITS; 1.243 + 1.244 + tmp10 = tmp0 + tmp3; 1.245 + tmp13 = tmp0 - tmp3; 1.246 + tmp11 = tmp1 + tmp2; 1.247 + tmp12 = tmp1 - tmp2; 1.248 + 1.249 + /* Odd part per figure 8; the matrix is unitary and hence its 1.250 + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 1.251 + */ 1.252 + 1.253 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.254 + tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.255 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.256 + tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.257 + 1.258 + z1 = tmp0 + tmp3; 1.259 + z2 = tmp1 + tmp2; 1.260 + z3 = tmp0 + tmp2; 1.261 + z4 = tmp1 + tmp3; 1.262 + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ 1.263 + 1.264 + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ 1.265 + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ 1.266 + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ 1.267 + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ 1.268 + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ 1.269 + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ 1.270 + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ 1.271 + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ 1.272 + 1.273 + z3 += z5; 1.274 + z4 += z5; 1.275 + 1.276 + tmp0 += z1 + z3; 1.277 + tmp1 += z2 + z4; 1.278 + tmp2 += z2 + z3; 1.279 + tmp3 += z1 + z4; 1.280 + 1.281 + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1.282 + 1.283 + wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 1.284 + wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 1.285 + wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 1.286 + wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 1.287 + wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 1.288 + wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 1.289 + wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 1.290 + wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 1.291 + 1.292 + inptr++; /* advance pointers to next column */ 1.293 + quantptr++; 1.294 + wsptr++; 1.295 + } 1.296 + 1.297 + /* Pass 2: process rows from work array, store into output array. */ 1.298 + /* Note that we must descale the results by a factor of 8 == 2**3, */ 1.299 + /* and also undo the PASS1_BITS scaling. */ 1.300 + 1.301 + wsptr = workspace; 1.302 + for (ctr = 0; ctr < DCTSIZE; ctr++) { 1.303 + outptr = output_buf[ctr] + output_col; 1.304 + /* Rows of zeroes can be exploited in the same way as we did with columns. 1.305 + * However, the column calculation has created many nonzero AC terms, so 1.306 + * the simplification applies less often (typically 5% to 10% of the time). 1.307 + * On machines with very fast multiplication, it's possible that the 1.308 + * test takes more time than it's worth. In that case this section 1.309 + * may be commented out. 1.310 + */ 1.311 + 1.312 +#ifndef NO_ZERO_ROW_TEST 1.313 + if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && 1.314 + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { 1.315 + /* AC terms all zero */ 1.316 + JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) 1.317 + & RANGE_MASK]; 1.318 + 1.319 + outptr[0] = dcval; 1.320 + outptr[1] = dcval; 1.321 + outptr[2] = dcval; 1.322 + outptr[3] = dcval; 1.323 + outptr[4] = dcval; 1.324 + outptr[5] = dcval; 1.325 + outptr[6] = dcval; 1.326 + outptr[7] = dcval; 1.327 + 1.328 + wsptr += DCTSIZE; /* advance pointer to next row */ 1.329 + continue; 1.330 + } 1.331 +#endif 1.332 + 1.333 + /* Even part: reverse the even part of the forward DCT. */ 1.334 + /* The rotator is sqrt(2)*c(-6). */ 1.335 + 1.336 + z2 = (INT32) wsptr[2]; 1.337 + z3 = (INT32) wsptr[6]; 1.338 + 1.339 + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); 1.340 + tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); 1.341 + tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); 1.342 + 1.343 + tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; 1.344 + tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; 1.345 + 1.346 + tmp10 = tmp0 + tmp3; 1.347 + tmp13 = tmp0 - tmp3; 1.348 + tmp11 = tmp1 + tmp2; 1.349 + tmp12 = tmp1 - tmp2; 1.350 + 1.351 + /* Odd part per figure 8; the matrix is unitary and hence its 1.352 + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 1.353 + */ 1.354 + 1.355 + tmp0 = (INT32) wsptr[7]; 1.356 + tmp1 = (INT32) wsptr[5]; 1.357 + tmp2 = (INT32) wsptr[3]; 1.358 + tmp3 = (INT32) wsptr[1]; 1.359 + 1.360 + z1 = tmp0 + tmp3; 1.361 + z2 = tmp1 + tmp2; 1.362 + z3 = tmp0 + tmp2; 1.363 + z4 = tmp1 + tmp3; 1.364 + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ 1.365 + 1.366 + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ 1.367 + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ 1.368 + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ 1.369 + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ 1.370 + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ 1.371 + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ 1.372 + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ 1.373 + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ 1.374 + 1.375 + z3 += z5; 1.376 + z4 += z5; 1.377 + 1.378 + tmp0 += z1 + z3; 1.379 + tmp1 += z2 + z4; 1.380 + tmp2 += z2 + z3; 1.381 + tmp3 += z1 + z4; 1.382 + 1.383 + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 1.384 + 1.385 + outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, 1.386 + CONST_BITS+PASS1_BITS+3) 1.387 + & RANGE_MASK]; 1.388 + outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, 1.389 + CONST_BITS+PASS1_BITS+3) 1.390 + & RANGE_MASK]; 1.391 + outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, 1.392 + CONST_BITS+PASS1_BITS+3) 1.393 + & RANGE_MASK]; 1.394 + outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, 1.395 + CONST_BITS+PASS1_BITS+3) 1.396 + & RANGE_MASK]; 1.397 + outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, 1.398 + CONST_BITS+PASS1_BITS+3) 1.399 + & RANGE_MASK]; 1.400 + outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, 1.401 + CONST_BITS+PASS1_BITS+3) 1.402 + & RANGE_MASK]; 1.403 + outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, 1.404 + CONST_BITS+PASS1_BITS+3) 1.405 + & RANGE_MASK]; 1.406 + outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, 1.407 + CONST_BITS+PASS1_BITS+3) 1.408 + & RANGE_MASK]; 1.409 + 1.410 + wsptr += DCTSIZE; /* advance pointer to next row */ 1.411 + } 1.412 +} 1.413 + 1.414 +#ifdef IDCT_SCALING_SUPPORTED 1.415 + 1.416 + 1.417 +/* 1.418 + * Perform dequantization and inverse DCT on one block of coefficients, 1.419 + * producing a 7x7 output block. 1.420 + * 1.421 + * Optimized algorithm with 12 multiplications in the 1-D kernel. 1.422 + * cK represents sqrt(2) * cos(K*pi/14). 1.423 + */ 1.424 + 1.425 +GLOBAL(void) 1.426 +jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.427 + JCOEFPTR coef_block, 1.428 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.429 +{ 1.430 + INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; 1.431 + INT32 z1, z2, z3; 1.432 + JCOEFPTR inptr; 1.433 + ISLOW_MULT_TYPE * quantptr; 1.434 + int * wsptr; 1.435 + JSAMPROW outptr; 1.436 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.437 + int ctr; 1.438 + int workspace[7*7]; /* buffers data between passes */ 1.439 + SHIFT_TEMPS 1.440 + 1.441 + /* Pass 1: process columns from input, store into work array. */ 1.442 + 1.443 + inptr = coef_block; 1.444 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.445 + wsptr = workspace; 1.446 + for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { 1.447 + /* Even part */ 1.448 + 1.449 + tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.450 + tmp13 <<= CONST_BITS; 1.451 + /* Add fudge factor here for final descale. */ 1.452 + tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); 1.453 + 1.454 + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.455 + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.456 + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.457 + 1.458 + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 1.459 + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 1.460 + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 1.461 + tmp0 = z1 + z3; 1.462 + z2 -= tmp0; 1.463 + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 1.464 + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 1.465 + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 1.466 + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 1.467 + 1.468 + /* Odd part */ 1.469 + 1.470 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.471 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.472 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.473 + 1.474 + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 1.475 + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 1.476 + tmp0 = tmp1 - tmp2; 1.477 + tmp1 += tmp2; 1.478 + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 1.479 + tmp1 += tmp2; 1.480 + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 1.481 + tmp0 += z2; 1.482 + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 1.483 + 1.484 + /* Final output stage */ 1.485 + 1.486 + wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 1.487 + wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 1.488 + wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 1.489 + wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 1.490 + wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 1.491 + wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 1.492 + wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); 1.493 + } 1.494 + 1.495 + /* Pass 2: process 7 rows from work array, store into output array. */ 1.496 + 1.497 + wsptr = workspace; 1.498 + for (ctr = 0; ctr < 7; ctr++) { 1.499 + outptr = output_buf[ctr] + output_col; 1.500 + 1.501 + /* Even part */ 1.502 + 1.503 + /* Add fudge factor here for final descale. */ 1.504 + tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.505 + tmp13 <<= CONST_BITS; 1.506 + 1.507 + z1 = (INT32) wsptr[2]; 1.508 + z2 = (INT32) wsptr[4]; 1.509 + z3 = (INT32) wsptr[6]; 1.510 + 1.511 + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 1.512 + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 1.513 + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 1.514 + tmp0 = z1 + z3; 1.515 + z2 -= tmp0; 1.516 + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 1.517 + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 1.518 + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 1.519 + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 1.520 + 1.521 + /* Odd part */ 1.522 + 1.523 + z1 = (INT32) wsptr[1]; 1.524 + z2 = (INT32) wsptr[3]; 1.525 + z3 = (INT32) wsptr[5]; 1.526 + 1.527 + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 1.528 + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 1.529 + tmp0 = tmp1 - tmp2; 1.530 + tmp1 += tmp2; 1.531 + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 1.532 + tmp1 += tmp2; 1.533 + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 1.534 + tmp0 += z2; 1.535 + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 1.536 + 1.537 + /* Final output stage */ 1.538 + 1.539 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1.540 + CONST_BITS+PASS1_BITS+3) 1.541 + & RANGE_MASK]; 1.542 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1.543 + CONST_BITS+PASS1_BITS+3) 1.544 + & RANGE_MASK]; 1.545 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 1.546 + CONST_BITS+PASS1_BITS+3) 1.547 + & RANGE_MASK]; 1.548 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 1.549 + CONST_BITS+PASS1_BITS+3) 1.550 + & RANGE_MASK]; 1.551 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 1.552 + CONST_BITS+PASS1_BITS+3) 1.553 + & RANGE_MASK]; 1.554 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 1.555 + CONST_BITS+PASS1_BITS+3) 1.556 + & RANGE_MASK]; 1.557 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, 1.558 + CONST_BITS+PASS1_BITS+3) 1.559 + & RANGE_MASK]; 1.560 + 1.561 + wsptr += 7; /* advance pointer to next row */ 1.562 + } 1.563 +} 1.564 + 1.565 + 1.566 +/* 1.567 + * Perform dequantization and inverse DCT on one block of coefficients, 1.568 + * producing a reduced-size 6x6 output block. 1.569 + * 1.570 + * Optimized algorithm with 3 multiplications in the 1-D kernel. 1.571 + * cK represents sqrt(2) * cos(K*pi/12). 1.572 + */ 1.573 + 1.574 +GLOBAL(void) 1.575 +jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.576 + JCOEFPTR coef_block, 1.577 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.578 +{ 1.579 + INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 1.580 + INT32 z1, z2, z3; 1.581 + JCOEFPTR inptr; 1.582 + ISLOW_MULT_TYPE * quantptr; 1.583 + int * wsptr; 1.584 + JSAMPROW outptr; 1.585 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.586 + int ctr; 1.587 + int workspace[6*6]; /* buffers data between passes */ 1.588 + SHIFT_TEMPS 1.589 + 1.590 + /* Pass 1: process columns from input, store into work array. */ 1.591 + 1.592 + inptr = coef_block; 1.593 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.594 + wsptr = workspace; 1.595 + for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 1.596 + /* Even part */ 1.597 + 1.598 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.599 + tmp0 <<= CONST_BITS; 1.600 + /* Add fudge factor here for final descale. */ 1.601 + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 1.602 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.603 + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 1.604 + tmp1 = tmp0 + tmp10; 1.605 + tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); 1.606 + tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.607 + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 1.608 + tmp10 = tmp1 + tmp0; 1.609 + tmp12 = tmp1 - tmp0; 1.610 + 1.611 + /* Odd part */ 1.612 + 1.613 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.614 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.615 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.616 + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 1.617 + tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 1.618 + tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 1.619 + tmp1 = (z1 - z2 - z3) << PASS1_BITS; 1.620 + 1.621 + /* Final output stage */ 1.622 + 1.623 + wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 1.624 + wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 1.625 + wsptr[6*1] = (int) (tmp11 + tmp1); 1.626 + wsptr[6*4] = (int) (tmp11 - tmp1); 1.627 + wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 1.628 + wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 1.629 + } 1.630 + 1.631 + /* Pass 2: process 6 rows from work array, store into output array. */ 1.632 + 1.633 + wsptr = workspace; 1.634 + for (ctr = 0; ctr < 6; ctr++) { 1.635 + outptr = output_buf[ctr] + output_col; 1.636 + 1.637 + /* Even part */ 1.638 + 1.639 + /* Add fudge factor here for final descale. */ 1.640 + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.641 + tmp0 <<= CONST_BITS; 1.642 + tmp2 = (INT32) wsptr[4]; 1.643 + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 1.644 + tmp1 = tmp0 + tmp10; 1.645 + tmp11 = tmp0 - tmp10 - tmp10; 1.646 + tmp10 = (INT32) wsptr[2]; 1.647 + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 1.648 + tmp10 = tmp1 + tmp0; 1.649 + tmp12 = tmp1 - tmp0; 1.650 + 1.651 + /* Odd part */ 1.652 + 1.653 + z1 = (INT32) wsptr[1]; 1.654 + z2 = (INT32) wsptr[3]; 1.655 + z3 = (INT32) wsptr[5]; 1.656 + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 1.657 + tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 1.658 + tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 1.659 + tmp1 = (z1 - z2 - z3) << CONST_BITS; 1.660 + 1.661 + /* Final output stage */ 1.662 + 1.663 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1.664 + CONST_BITS+PASS1_BITS+3) 1.665 + & RANGE_MASK]; 1.666 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1.667 + CONST_BITS+PASS1_BITS+3) 1.668 + & RANGE_MASK]; 1.669 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 1.670 + CONST_BITS+PASS1_BITS+3) 1.671 + & RANGE_MASK]; 1.672 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 1.673 + CONST_BITS+PASS1_BITS+3) 1.674 + & RANGE_MASK]; 1.675 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 1.676 + CONST_BITS+PASS1_BITS+3) 1.677 + & RANGE_MASK]; 1.678 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 1.679 + CONST_BITS+PASS1_BITS+3) 1.680 + & RANGE_MASK]; 1.681 + 1.682 + wsptr += 6; /* advance pointer to next row */ 1.683 + } 1.684 +} 1.685 + 1.686 + 1.687 +/* 1.688 + * Perform dequantization and inverse DCT on one block of coefficients, 1.689 + * producing a reduced-size 5x5 output block. 1.690 + * 1.691 + * Optimized algorithm with 5 multiplications in the 1-D kernel. 1.692 + * cK represents sqrt(2) * cos(K*pi/10). 1.693 + */ 1.694 + 1.695 +GLOBAL(void) 1.696 +jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.697 + JCOEFPTR coef_block, 1.698 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.699 +{ 1.700 + INT32 tmp0, tmp1, tmp10, tmp11, tmp12; 1.701 + INT32 z1, z2, z3; 1.702 + JCOEFPTR inptr; 1.703 + ISLOW_MULT_TYPE * quantptr; 1.704 + int * wsptr; 1.705 + JSAMPROW outptr; 1.706 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.707 + int ctr; 1.708 + int workspace[5*5]; /* buffers data between passes */ 1.709 + SHIFT_TEMPS 1.710 + 1.711 + /* Pass 1: process columns from input, store into work array. */ 1.712 + 1.713 + inptr = coef_block; 1.714 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.715 + wsptr = workspace; 1.716 + for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { 1.717 + /* Even part */ 1.718 + 1.719 + tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.720 + tmp12 <<= CONST_BITS; 1.721 + /* Add fudge factor here for final descale. */ 1.722 + tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); 1.723 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.724 + tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.725 + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 1.726 + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 1.727 + z3 = tmp12 + z2; 1.728 + tmp10 = z3 + z1; 1.729 + tmp11 = z3 - z1; 1.730 + tmp12 -= z2 << 2; 1.731 + 1.732 + /* Odd part */ 1.733 + 1.734 + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.735 + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.736 + 1.737 + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 1.738 + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 1.739 + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 1.740 + 1.741 + /* Final output stage */ 1.742 + 1.743 + wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 1.744 + wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 1.745 + wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 1.746 + wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 1.747 + wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); 1.748 + } 1.749 + 1.750 + /* Pass 2: process 5 rows from work array, store into output array. */ 1.751 + 1.752 + wsptr = workspace; 1.753 + for (ctr = 0; ctr < 5; ctr++) { 1.754 + outptr = output_buf[ctr] + output_col; 1.755 + 1.756 + /* Even part */ 1.757 + 1.758 + /* Add fudge factor here for final descale. */ 1.759 + tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.760 + tmp12 <<= CONST_BITS; 1.761 + tmp0 = (INT32) wsptr[2]; 1.762 + tmp1 = (INT32) wsptr[4]; 1.763 + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 1.764 + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 1.765 + z3 = tmp12 + z2; 1.766 + tmp10 = z3 + z1; 1.767 + tmp11 = z3 - z1; 1.768 + tmp12 -= z2 << 2; 1.769 + 1.770 + /* Odd part */ 1.771 + 1.772 + z2 = (INT32) wsptr[1]; 1.773 + z3 = (INT32) wsptr[3]; 1.774 + 1.775 + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 1.776 + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 1.777 + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 1.778 + 1.779 + /* Final output stage */ 1.780 + 1.781 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1.782 + CONST_BITS+PASS1_BITS+3) 1.783 + & RANGE_MASK]; 1.784 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1.785 + CONST_BITS+PASS1_BITS+3) 1.786 + & RANGE_MASK]; 1.787 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 1.788 + CONST_BITS+PASS1_BITS+3) 1.789 + & RANGE_MASK]; 1.790 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 1.791 + CONST_BITS+PASS1_BITS+3) 1.792 + & RANGE_MASK]; 1.793 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, 1.794 + CONST_BITS+PASS1_BITS+3) 1.795 + & RANGE_MASK]; 1.796 + 1.797 + wsptr += 5; /* advance pointer to next row */ 1.798 + } 1.799 +} 1.800 + 1.801 + 1.802 +/* 1.803 + * Perform dequantization and inverse DCT on one block of coefficients, 1.804 + * producing a reduced-size 3x3 output block. 1.805 + * 1.806 + * Optimized algorithm with 2 multiplications in the 1-D kernel. 1.807 + * cK represents sqrt(2) * cos(K*pi/6). 1.808 + */ 1.809 + 1.810 +GLOBAL(void) 1.811 +jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.812 + JCOEFPTR coef_block, 1.813 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.814 +{ 1.815 + INT32 tmp0, tmp2, tmp10, tmp12; 1.816 + JCOEFPTR inptr; 1.817 + ISLOW_MULT_TYPE * quantptr; 1.818 + int * wsptr; 1.819 + JSAMPROW outptr; 1.820 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.821 + int ctr; 1.822 + int workspace[3*3]; /* buffers data between passes */ 1.823 + SHIFT_TEMPS 1.824 + 1.825 + /* Pass 1: process columns from input, store into work array. */ 1.826 + 1.827 + inptr = coef_block; 1.828 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.829 + wsptr = workspace; 1.830 + for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { 1.831 + /* Even part */ 1.832 + 1.833 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.834 + tmp0 <<= CONST_BITS; 1.835 + /* Add fudge factor here for final descale. */ 1.836 + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 1.837 + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.838 + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 1.839 + tmp10 = tmp0 + tmp12; 1.840 + tmp2 = tmp0 - tmp12 - tmp12; 1.841 + 1.842 + /* Odd part */ 1.843 + 1.844 + tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.845 + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 1.846 + 1.847 + /* Final output stage */ 1.848 + 1.849 + wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 1.850 + wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 1.851 + wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); 1.852 + } 1.853 + 1.854 + /* Pass 2: process 3 rows from work array, store into output array. */ 1.855 + 1.856 + wsptr = workspace; 1.857 + for (ctr = 0; ctr < 3; ctr++) { 1.858 + outptr = output_buf[ctr] + output_col; 1.859 + 1.860 + /* Even part */ 1.861 + 1.862 + /* Add fudge factor here for final descale. */ 1.863 + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.864 + tmp0 <<= CONST_BITS; 1.865 + tmp2 = (INT32) wsptr[2]; 1.866 + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 1.867 + tmp10 = tmp0 + tmp12; 1.868 + tmp2 = tmp0 - tmp12 - tmp12; 1.869 + 1.870 + /* Odd part */ 1.871 + 1.872 + tmp12 = (INT32) wsptr[1]; 1.873 + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 1.874 + 1.875 + /* Final output stage */ 1.876 + 1.877 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1.878 + CONST_BITS+PASS1_BITS+3) 1.879 + & RANGE_MASK]; 1.880 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1.881 + CONST_BITS+PASS1_BITS+3) 1.882 + & RANGE_MASK]; 1.883 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, 1.884 + CONST_BITS+PASS1_BITS+3) 1.885 + & RANGE_MASK]; 1.886 + 1.887 + wsptr += 3; /* advance pointer to next row */ 1.888 + } 1.889 +} 1.890 + 1.891 + 1.892 +/* 1.893 + * Perform dequantization and inverse DCT on one block of coefficients, 1.894 + * producing a 9x9 output block. 1.895 + * 1.896 + * Optimized algorithm with 10 multiplications in the 1-D kernel. 1.897 + * cK represents sqrt(2) * cos(K*pi/18). 1.898 + */ 1.899 + 1.900 +GLOBAL(void) 1.901 +jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.902 + JCOEFPTR coef_block, 1.903 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.904 +{ 1.905 + INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; 1.906 + INT32 z1, z2, z3, z4; 1.907 + JCOEFPTR inptr; 1.908 + ISLOW_MULT_TYPE * quantptr; 1.909 + int * wsptr; 1.910 + JSAMPROW outptr; 1.911 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.912 + int ctr; 1.913 + int workspace[8*9]; /* buffers data between passes */ 1.914 + SHIFT_TEMPS 1.915 + 1.916 + /* Pass 1: process columns from input, store into work array. */ 1.917 + 1.918 + inptr = coef_block; 1.919 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.920 + wsptr = workspace; 1.921 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.922 + /* Even part */ 1.923 + 1.924 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.925 + tmp0 <<= CONST_BITS; 1.926 + /* Add fudge factor here for final descale. */ 1.927 + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 1.928 + 1.929 + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.930 + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.931 + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.932 + 1.933 + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 1.934 + tmp1 = tmp0 + tmp3; 1.935 + tmp2 = tmp0 - tmp3 - tmp3; 1.936 + 1.937 + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 1.938 + tmp11 = tmp2 + tmp0; 1.939 + tmp14 = tmp2 - tmp0 - tmp0; 1.940 + 1.941 + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 1.942 + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 1.943 + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 1.944 + 1.945 + tmp10 = tmp1 + tmp0 - tmp3; 1.946 + tmp12 = tmp1 - tmp0 + tmp2; 1.947 + tmp13 = tmp1 - tmp2 + tmp3; 1.948 + 1.949 + /* Odd part */ 1.950 + 1.951 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.952 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.953 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.954 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.955 + 1.956 + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 1.957 + 1.958 + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 1.959 + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 1.960 + tmp0 = tmp2 + tmp3 - z2; 1.961 + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 1.962 + tmp2 += z2 - tmp1; 1.963 + tmp3 += z2 + tmp1; 1.964 + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 1.965 + 1.966 + /* Final output stage */ 1.967 + 1.968 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 1.969 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 1.970 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 1.971 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 1.972 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 1.973 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 1.974 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); 1.975 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); 1.976 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); 1.977 + } 1.978 + 1.979 + /* Pass 2: process 9 rows from work array, store into output array. */ 1.980 + 1.981 + wsptr = workspace; 1.982 + for (ctr = 0; ctr < 9; ctr++) { 1.983 + outptr = output_buf[ctr] + output_col; 1.984 + 1.985 + /* Even part */ 1.986 + 1.987 + /* Add fudge factor here for final descale. */ 1.988 + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.989 + tmp0 <<= CONST_BITS; 1.990 + 1.991 + z1 = (INT32) wsptr[2]; 1.992 + z2 = (INT32) wsptr[4]; 1.993 + z3 = (INT32) wsptr[6]; 1.994 + 1.995 + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 1.996 + tmp1 = tmp0 + tmp3; 1.997 + tmp2 = tmp0 - tmp3 - tmp3; 1.998 + 1.999 + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 1.1000 + tmp11 = tmp2 + tmp0; 1.1001 + tmp14 = tmp2 - tmp0 - tmp0; 1.1002 + 1.1003 + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 1.1004 + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 1.1005 + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 1.1006 + 1.1007 + tmp10 = tmp1 + tmp0 - tmp3; 1.1008 + tmp12 = tmp1 - tmp0 + tmp2; 1.1009 + tmp13 = tmp1 - tmp2 + tmp3; 1.1010 + 1.1011 + /* Odd part */ 1.1012 + 1.1013 + z1 = (INT32) wsptr[1]; 1.1014 + z2 = (INT32) wsptr[3]; 1.1015 + z3 = (INT32) wsptr[5]; 1.1016 + z4 = (INT32) wsptr[7]; 1.1017 + 1.1018 + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 1.1019 + 1.1020 + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 1.1021 + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 1.1022 + tmp0 = tmp2 + tmp3 - z2; 1.1023 + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 1.1024 + tmp2 += z2 - tmp1; 1.1025 + tmp3 += z2 + tmp1; 1.1026 + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 1.1027 + 1.1028 + /* Final output stage */ 1.1029 + 1.1030 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1.1031 + CONST_BITS+PASS1_BITS+3) 1.1032 + & RANGE_MASK]; 1.1033 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1.1034 + CONST_BITS+PASS1_BITS+3) 1.1035 + & RANGE_MASK]; 1.1036 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 1.1037 + CONST_BITS+PASS1_BITS+3) 1.1038 + & RANGE_MASK]; 1.1039 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 1.1040 + CONST_BITS+PASS1_BITS+3) 1.1041 + & RANGE_MASK]; 1.1042 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 1.1043 + CONST_BITS+PASS1_BITS+3) 1.1044 + & RANGE_MASK]; 1.1045 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 1.1046 + CONST_BITS+PASS1_BITS+3) 1.1047 + & RANGE_MASK]; 1.1048 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, 1.1049 + CONST_BITS+PASS1_BITS+3) 1.1050 + & RANGE_MASK]; 1.1051 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, 1.1052 + CONST_BITS+PASS1_BITS+3) 1.1053 + & RANGE_MASK]; 1.1054 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, 1.1055 + CONST_BITS+PASS1_BITS+3) 1.1056 + & RANGE_MASK]; 1.1057 + 1.1058 + wsptr += 8; /* advance pointer to next row */ 1.1059 + } 1.1060 +} 1.1061 + 1.1062 + 1.1063 +/* 1.1064 + * Perform dequantization and inverse DCT on one block of coefficients, 1.1065 + * producing a 10x10 output block. 1.1066 + * 1.1067 + * Optimized algorithm with 12 multiplications in the 1-D kernel. 1.1068 + * cK represents sqrt(2) * cos(K*pi/20). 1.1069 + */ 1.1070 + 1.1071 +GLOBAL(void) 1.1072 +jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1073 + JCOEFPTR coef_block, 1.1074 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.1075 +{ 1.1076 + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1.1077 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 1.1078 + INT32 z1, z2, z3, z4, z5; 1.1079 + JCOEFPTR inptr; 1.1080 + ISLOW_MULT_TYPE * quantptr; 1.1081 + int * wsptr; 1.1082 + JSAMPROW outptr; 1.1083 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.1084 + int ctr; 1.1085 + int workspace[8*10]; /* buffers data between passes */ 1.1086 + SHIFT_TEMPS 1.1087 + 1.1088 + /* Pass 1: process columns from input, store into work array. */ 1.1089 + 1.1090 + inptr = coef_block; 1.1091 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.1092 + wsptr = workspace; 1.1093 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.1094 + /* Even part */ 1.1095 + 1.1096 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.1097 + z3 <<= CONST_BITS; 1.1098 + /* Add fudge factor here for final descale. */ 1.1099 + z3 += ONE << (CONST_BITS-PASS1_BITS-1); 1.1100 + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.1101 + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 1.1102 + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 1.1103 + tmp10 = z3 + z1; 1.1104 + tmp11 = z3 - z2; 1.1105 + 1.1106 + tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ 1.1107 + CONST_BITS-PASS1_BITS); 1.1108 + 1.1109 + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.1110 + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.1111 + 1.1112 + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 1.1113 + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 1.1114 + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 1.1115 + 1.1116 + tmp20 = tmp10 + tmp12; 1.1117 + tmp24 = tmp10 - tmp12; 1.1118 + tmp21 = tmp11 + tmp13; 1.1119 + tmp23 = tmp11 - tmp13; 1.1120 + 1.1121 + /* Odd part */ 1.1122 + 1.1123 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.1124 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.1125 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.1126 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.1127 + 1.1128 + tmp11 = z2 + z4; 1.1129 + tmp13 = z2 - z4; 1.1130 + 1.1131 + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 1.1132 + z5 = z3 << CONST_BITS; 1.1133 + 1.1134 + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 1.1135 + z4 = z5 + tmp12; 1.1136 + 1.1137 + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 1.1138 + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 1.1139 + 1.1140 + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 1.1141 + z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); 1.1142 + 1.1143 + tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; 1.1144 + 1.1145 + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 1.1146 + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 1.1147 + 1.1148 + /* Final output stage */ 1.1149 + 1.1150 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1.1151 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1.1152 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1.1153 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1.1154 + wsptr[8*2] = (int) (tmp22 + tmp12); 1.1155 + wsptr[8*7] = (int) (tmp22 - tmp12); 1.1156 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1.1157 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1.1158 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1.1159 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1.1160 + } 1.1161 + 1.1162 + /* Pass 2: process 10 rows from work array, store into output array. */ 1.1163 + 1.1164 + wsptr = workspace; 1.1165 + for (ctr = 0; ctr < 10; ctr++) { 1.1166 + outptr = output_buf[ctr] + output_col; 1.1167 + 1.1168 + /* Even part */ 1.1169 + 1.1170 + /* Add fudge factor here for final descale. */ 1.1171 + z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.1172 + z3 <<= CONST_BITS; 1.1173 + z4 = (INT32) wsptr[4]; 1.1174 + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 1.1175 + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 1.1176 + tmp10 = z3 + z1; 1.1177 + tmp11 = z3 - z2; 1.1178 + 1.1179 + tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ 1.1180 + 1.1181 + z2 = (INT32) wsptr[2]; 1.1182 + z3 = (INT32) wsptr[6]; 1.1183 + 1.1184 + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 1.1185 + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 1.1186 + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 1.1187 + 1.1188 + tmp20 = tmp10 + tmp12; 1.1189 + tmp24 = tmp10 - tmp12; 1.1190 + tmp21 = tmp11 + tmp13; 1.1191 + tmp23 = tmp11 - tmp13; 1.1192 + 1.1193 + /* Odd part */ 1.1194 + 1.1195 + z1 = (INT32) wsptr[1]; 1.1196 + z2 = (INT32) wsptr[3]; 1.1197 + z3 = (INT32) wsptr[5]; 1.1198 + z3 <<= CONST_BITS; 1.1199 + z4 = (INT32) wsptr[7]; 1.1200 + 1.1201 + tmp11 = z2 + z4; 1.1202 + tmp13 = z2 - z4; 1.1203 + 1.1204 + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 1.1205 + 1.1206 + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 1.1207 + z4 = z3 + tmp12; 1.1208 + 1.1209 + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 1.1210 + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 1.1211 + 1.1212 + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 1.1213 + z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); 1.1214 + 1.1215 + tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; 1.1216 + 1.1217 + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 1.1218 + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 1.1219 + 1.1220 + /* Final output stage */ 1.1221 + 1.1222 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1.1223 + CONST_BITS+PASS1_BITS+3) 1.1224 + & RANGE_MASK]; 1.1225 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1.1226 + CONST_BITS+PASS1_BITS+3) 1.1227 + & RANGE_MASK]; 1.1228 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1.1229 + CONST_BITS+PASS1_BITS+3) 1.1230 + & RANGE_MASK]; 1.1231 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1.1232 + CONST_BITS+PASS1_BITS+3) 1.1233 + & RANGE_MASK]; 1.1234 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1.1235 + CONST_BITS+PASS1_BITS+3) 1.1236 + & RANGE_MASK]; 1.1237 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1.1238 + CONST_BITS+PASS1_BITS+3) 1.1239 + & RANGE_MASK]; 1.1240 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1.1241 + CONST_BITS+PASS1_BITS+3) 1.1242 + & RANGE_MASK]; 1.1243 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1.1244 + CONST_BITS+PASS1_BITS+3) 1.1245 + & RANGE_MASK]; 1.1246 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1.1247 + CONST_BITS+PASS1_BITS+3) 1.1248 + & RANGE_MASK]; 1.1249 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1.1250 + CONST_BITS+PASS1_BITS+3) 1.1251 + & RANGE_MASK]; 1.1252 + 1.1253 + wsptr += 8; /* advance pointer to next row */ 1.1254 + } 1.1255 +} 1.1256 + 1.1257 + 1.1258 +/* 1.1259 + * Perform dequantization and inverse DCT on one block of coefficients, 1.1260 + * producing a 11x11 output block. 1.1261 + * 1.1262 + * Optimized algorithm with 24 multiplications in the 1-D kernel. 1.1263 + * cK represents sqrt(2) * cos(K*pi/22). 1.1264 + */ 1.1265 + 1.1266 +GLOBAL(void) 1.1267 +jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1268 + JCOEFPTR coef_block, 1.1269 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.1270 +{ 1.1271 + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1.1272 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 1.1273 + INT32 z1, z2, z3, z4; 1.1274 + JCOEFPTR inptr; 1.1275 + ISLOW_MULT_TYPE * quantptr; 1.1276 + int * wsptr; 1.1277 + JSAMPROW outptr; 1.1278 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.1279 + int ctr; 1.1280 + int workspace[8*11]; /* buffers data between passes */ 1.1281 + SHIFT_TEMPS 1.1282 + 1.1283 + /* Pass 1: process columns from input, store into work array. */ 1.1284 + 1.1285 + inptr = coef_block; 1.1286 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.1287 + wsptr = workspace; 1.1288 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.1289 + /* Even part */ 1.1290 + 1.1291 + tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.1292 + tmp10 <<= CONST_BITS; 1.1293 + /* Add fudge factor here for final descale. */ 1.1294 + tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); 1.1295 + 1.1296 + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.1297 + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.1298 + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.1299 + 1.1300 + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 1.1301 + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 1.1302 + z4 = z1 + z3; 1.1303 + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 1.1304 + z4 -= z2; 1.1305 + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 1.1306 + tmp21 = tmp20 + tmp23 + tmp25 - 1.1307 + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 1.1308 + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 1.1309 + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 1.1310 + tmp24 += tmp25; 1.1311 + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 1.1312 + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 1.1313 + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 1.1314 + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 1.1315 + 1.1316 + /* Odd part */ 1.1317 + 1.1318 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.1319 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.1320 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.1321 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.1322 + 1.1323 + tmp11 = z1 + z2; 1.1324 + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 1.1325 + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 1.1326 + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 1.1327 + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 1.1328 + tmp10 = tmp11 + tmp12 + tmp13 - 1.1329 + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 1.1330 + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 1.1331 + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 1.1332 + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 1.1333 + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 1.1334 + tmp11 += z1; 1.1335 + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 1.1336 + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 1.1337 + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 1.1338 + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 1.1339 + 1.1340 + /* Final output stage */ 1.1341 + 1.1342 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1.1343 + wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1.1344 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1.1345 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1.1346 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1.1347 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1.1348 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1.1349 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1.1350 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1.1351 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1.1352 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); 1.1353 + } 1.1354 + 1.1355 + /* Pass 2: process 11 rows from work array, store into output array. */ 1.1356 + 1.1357 + wsptr = workspace; 1.1358 + for (ctr = 0; ctr < 11; ctr++) { 1.1359 + outptr = output_buf[ctr] + output_col; 1.1360 + 1.1361 + /* Even part */ 1.1362 + 1.1363 + /* Add fudge factor here for final descale. */ 1.1364 + tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.1365 + tmp10 <<= CONST_BITS; 1.1366 + 1.1367 + z1 = (INT32) wsptr[2]; 1.1368 + z2 = (INT32) wsptr[4]; 1.1369 + z3 = (INT32) wsptr[6]; 1.1370 + 1.1371 + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 1.1372 + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 1.1373 + z4 = z1 + z3; 1.1374 + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 1.1375 + z4 -= z2; 1.1376 + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 1.1377 + tmp21 = tmp20 + tmp23 + tmp25 - 1.1378 + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 1.1379 + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 1.1380 + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 1.1381 + tmp24 += tmp25; 1.1382 + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 1.1383 + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 1.1384 + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 1.1385 + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 1.1386 + 1.1387 + /* Odd part */ 1.1388 + 1.1389 + z1 = (INT32) wsptr[1]; 1.1390 + z2 = (INT32) wsptr[3]; 1.1391 + z3 = (INT32) wsptr[5]; 1.1392 + z4 = (INT32) wsptr[7]; 1.1393 + 1.1394 + tmp11 = z1 + z2; 1.1395 + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 1.1396 + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 1.1397 + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 1.1398 + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 1.1399 + tmp10 = tmp11 + tmp12 + tmp13 - 1.1400 + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 1.1401 + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 1.1402 + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 1.1403 + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 1.1404 + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 1.1405 + tmp11 += z1; 1.1406 + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 1.1407 + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 1.1408 + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 1.1409 + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 1.1410 + 1.1411 + /* Final output stage */ 1.1412 + 1.1413 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1.1414 + CONST_BITS+PASS1_BITS+3) 1.1415 + & RANGE_MASK]; 1.1416 + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1.1417 + CONST_BITS+PASS1_BITS+3) 1.1418 + & RANGE_MASK]; 1.1419 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1.1420 + CONST_BITS+PASS1_BITS+3) 1.1421 + & RANGE_MASK]; 1.1422 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1.1423 + CONST_BITS+PASS1_BITS+3) 1.1424 + & RANGE_MASK]; 1.1425 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1.1426 + CONST_BITS+PASS1_BITS+3) 1.1427 + & RANGE_MASK]; 1.1428 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1.1429 + CONST_BITS+PASS1_BITS+3) 1.1430 + & RANGE_MASK]; 1.1431 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1.1432 + CONST_BITS+PASS1_BITS+3) 1.1433 + & RANGE_MASK]; 1.1434 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1.1435 + CONST_BITS+PASS1_BITS+3) 1.1436 + & RANGE_MASK]; 1.1437 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1.1438 + CONST_BITS+PASS1_BITS+3) 1.1439 + & RANGE_MASK]; 1.1440 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1.1441 + CONST_BITS+PASS1_BITS+3) 1.1442 + & RANGE_MASK]; 1.1443 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, 1.1444 + CONST_BITS+PASS1_BITS+3) 1.1445 + & RANGE_MASK]; 1.1446 + 1.1447 + wsptr += 8; /* advance pointer to next row */ 1.1448 + } 1.1449 +} 1.1450 + 1.1451 + 1.1452 +/* 1.1453 + * Perform dequantization and inverse DCT on one block of coefficients, 1.1454 + * producing a 12x12 output block. 1.1455 + * 1.1456 + * Optimized algorithm with 15 multiplications in the 1-D kernel. 1.1457 + * cK represents sqrt(2) * cos(K*pi/24). 1.1458 + */ 1.1459 + 1.1460 +GLOBAL(void) 1.1461 +jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1462 + JCOEFPTR coef_block, 1.1463 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.1464 +{ 1.1465 + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1.1466 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 1.1467 + INT32 z1, z2, z3, z4; 1.1468 + JCOEFPTR inptr; 1.1469 + ISLOW_MULT_TYPE * quantptr; 1.1470 + int * wsptr; 1.1471 + JSAMPROW outptr; 1.1472 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.1473 + int ctr; 1.1474 + int workspace[8*12]; /* buffers data between passes */ 1.1475 + SHIFT_TEMPS 1.1476 + 1.1477 + /* Pass 1: process columns from input, store into work array. */ 1.1478 + 1.1479 + inptr = coef_block; 1.1480 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.1481 + wsptr = workspace; 1.1482 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.1483 + /* Even part */ 1.1484 + 1.1485 + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.1486 + z3 <<= CONST_BITS; 1.1487 + /* Add fudge factor here for final descale. */ 1.1488 + z3 += ONE << (CONST_BITS-PASS1_BITS-1); 1.1489 + 1.1490 + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.1491 + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 1.1492 + 1.1493 + tmp10 = z3 + z4; 1.1494 + tmp11 = z3 - z4; 1.1495 + 1.1496 + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.1497 + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 1.1498 + z1 <<= CONST_BITS; 1.1499 + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.1500 + z2 <<= CONST_BITS; 1.1501 + 1.1502 + tmp12 = z1 - z2; 1.1503 + 1.1504 + tmp21 = z3 + tmp12; 1.1505 + tmp24 = z3 - tmp12; 1.1506 + 1.1507 + tmp12 = z4 + z2; 1.1508 + 1.1509 + tmp20 = tmp10 + tmp12; 1.1510 + tmp25 = tmp10 - tmp12; 1.1511 + 1.1512 + tmp12 = z4 - z1 - z2; 1.1513 + 1.1514 + tmp22 = tmp11 + tmp12; 1.1515 + tmp23 = tmp11 - tmp12; 1.1516 + 1.1517 + /* Odd part */ 1.1518 + 1.1519 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.1520 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.1521 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.1522 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.1523 + 1.1524 + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 1.1525 + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 1.1526 + 1.1527 + tmp10 = z1 + z3; 1.1528 + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 1.1529 + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 1.1530 + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 1.1531 + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 1.1532 + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 1.1533 + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 1.1534 + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 1.1535 + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 1.1536 + 1.1537 + z1 -= z4; 1.1538 + z2 -= z3; 1.1539 + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 1.1540 + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 1.1541 + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 1.1542 + 1.1543 + /* Final output stage */ 1.1544 + 1.1545 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1.1546 + wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1.1547 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1.1548 + wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1.1549 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1.1550 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1.1551 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1.1552 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1.1553 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1.1554 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1.1555 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1.1556 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1.1557 + } 1.1558 + 1.1559 + /* Pass 2: process 12 rows from work array, store into output array. */ 1.1560 + 1.1561 + wsptr = workspace; 1.1562 + for (ctr = 0; ctr < 12; ctr++) { 1.1563 + outptr = output_buf[ctr] + output_col; 1.1564 + 1.1565 + /* Even part */ 1.1566 + 1.1567 + /* Add fudge factor here for final descale. */ 1.1568 + z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.1569 + z3 <<= CONST_BITS; 1.1570 + 1.1571 + z4 = (INT32) wsptr[4]; 1.1572 + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 1.1573 + 1.1574 + tmp10 = z3 + z4; 1.1575 + tmp11 = z3 - z4; 1.1576 + 1.1577 + z1 = (INT32) wsptr[2]; 1.1578 + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 1.1579 + z1 <<= CONST_BITS; 1.1580 + z2 = (INT32) wsptr[6]; 1.1581 + z2 <<= CONST_BITS; 1.1582 + 1.1583 + tmp12 = z1 - z2; 1.1584 + 1.1585 + tmp21 = z3 + tmp12; 1.1586 + tmp24 = z3 - tmp12; 1.1587 + 1.1588 + tmp12 = z4 + z2; 1.1589 + 1.1590 + tmp20 = tmp10 + tmp12; 1.1591 + tmp25 = tmp10 - tmp12; 1.1592 + 1.1593 + tmp12 = z4 - z1 - z2; 1.1594 + 1.1595 + tmp22 = tmp11 + tmp12; 1.1596 + tmp23 = tmp11 - tmp12; 1.1597 + 1.1598 + /* Odd part */ 1.1599 + 1.1600 + z1 = (INT32) wsptr[1]; 1.1601 + z2 = (INT32) wsptr[3]; 1.1602 + z3 = (INT32) wsptr[5]; 1.1603 + z4 = (INT32) wsptr[7]; 1.1604 + 1.1605 + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 1.1606 + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 1.1607 + 1.1608 + tmp10 = z1 + z3; 1.1609 + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 1.1610 + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 1.1611 + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 1.1612 + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 1.1613 + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 1.1614 + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 1.1615 + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 1.1616 + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 1.1617 + 1.1618 + z1 -= z4; 1.1619 + z2 -= z3; 1.1620 + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 1.1621 + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 1.1622 + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 1.1623 + 1.1624 + /* Final output stage */ 1.1625 + 1.1626 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1.1627 + CONST_BITS+PASS1_BITS+3) 1.1628 + & RANGE_MASK]; 1.1629 + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1.1630 + CONST_BITS+PASS1_BITS+3) 1.1631 + & RANGE_MASK]; 1.1632 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1.1633 + CONST_BITS+PASS1_BITS+3) 1.1634 + & RANGE_MASK]; 1.1635 + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1.1636 + CONST_BITS+PASS1_BITS+3) 1.1637 + & RANGE_MASK]; 1.1638 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1.1639 + CONST_BITS+PASS1_BITS+3) 1.1640 + & RANGE_MASK]; 1.1641 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1.1642 + CONST_BITS+PASS1_BITS+3) 1.1643 + & RANGE_MASK]; 1.1644 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1.1645 + CONST_BITS+PASS1_BITS+3) 1.1646 + & RANGE_MASK]; 1.1647 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1.1648 + CONST_BITS+PASS1_BITS+3) 1.1649 + & RANGE_MASK]; 1.1650 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1.1651 + CONST_BITS+PASS1_BITS+3) 1.1652 + & RANGE_MASK]; 1.1653 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1.1654 + CONST_BITS+PASS1_BITS+3) 1.1655 + & RANGE_MASK]; 1.1656 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1.1657 + CONST_BITS+PASS1_BITS+3) 1.1658 + & RANGE_MASK]; 1.1659 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1.1660 + CONST_BITS+PASS1_BITS+3) 1.1661 + & RANGE_MASK]; 1.1662 + 1.1663 + wsptr += 8; /* advance pointer to next row */ 1.1664 + } 1.1665 +} 1.1666 + 1.1667 + 1.1668 +/* 1.1669 + * Perform dequantization and inverse DCT on one block of coefficients, 1.1670 + * producing a 13x13 output block. 1.1671 + * 1.1672 + * Optimized algorithm with 29 multiplications in the 1-D kernel. 1.1673 + * cK represents sqrt(2) * cos(K*pi/26). 1.1674 + */ 1.1675 + 1.1676 +GLOBAL(void) 1.1677 +jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1678 + JCOEFPTR coef_block, 1.1679 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.1680 +{ 1.1681 + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1.1682 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 1.1683 + INT32 z1, z2, z3, z4; 1.1684 + JCOEFPTR inptr; 1.1685 + ISLOW_MULT_TYPE * quantptr; 1.1686 + int * wsptr; 1.1687 + JSAMPROW outptr; 1.1688 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.1689 + int ctr; 1.1690 + int workspace[8*13]; /* buffers data between passes */ 1.1691 + SHIFT_TEMPS 1.1692 + 1.1693 + /* Pass 1: process columns from input, store into work array. */ 1.1694 + 1.1695 + inptr = coef_block; 1.1696 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.1697 + wsptr = workspace; 1.1698 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.1699 + /* Even part */ 1.1700 + 1.1701 + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.1702 + z1 <<= CONST_BITS; 1.1703 + /* Add fudge factor here for final descale. */ 1.1704 + z1 += ONE << (CONST_BITS-PASS1_BITS-1); 1.1705 + 1.1706 + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.1707 + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.1708 + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.1709 + 1.1710 + tmp10 = z3 + z4; 1.1711 + tmp11 = z3 - z4; 1.1712 + 1.1713 + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 1.1714 + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 1.1715 + 1.1716 + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 1.1717 + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 1.1718 + 1.1719 + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 1.1720 + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 1.1721 + 1.1722 + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 1.1723 + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 1.1724 + 1.1725 + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 1.1726 + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 1.1727 + 1.1728 + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 1.1729 + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 1.1730 + 1.1731 + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 1.1732 + 1.1733 + /* Odd part */ 1.1734 + 1.1735 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.1736 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.1737 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.1738 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.1739 + 1.1740 + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 1.1741 + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 1.1742 + tmp15 = z1 + z4; 1.1743 + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 1.1744 + tmp10 = tmp11 + tmp12 + tmp13 - 1.1745 + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 1.1746 + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 1.1747 + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 1.1748 + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 1.1749 + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 1.1750 + tmp11 += tmp14; 1.1751 + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 1.1752 + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 1.1753 + tmp12 += tmp14; 1.1754 + tmp13 += tmp14; 1.1755 + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 1.1756 + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 1.1757 + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 1.1758 + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 1.1759 + tmp14 += z1; 1.1760 + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 1.1761 + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 1.1762 + 1.1763 + /* Final output stage */ 1.1764 + 1.1765 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1.1766 + wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1.1767 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1.1768 + wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1.1769 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1.1770 + wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1.1771 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1.1772 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1.1773 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1.1774 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1.1775 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1.1776 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1.1777 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); 1.1778 + } 1.1779 + 1.1780 + /* Pass 2: process 13 rows from work array, store into output array. */ 1.1781 + 1.1782 + wsptr = workspace; 1.1783 + for (ctr = 0; ctr < 13; ctr++) { 1.1784 + outptr = output_buf[ctr] + output_col; 1.1785 + 1.1786 + /* Even part */ 1.1787 + 1.1788 + /* Add fudge factor here for final descale. */ 1.1789 + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.1790 + z1 <<= CONST_BITS; 1.1791 + 1.1792 + z2 = (INT32) wsptr[2]; 1.1793 + z3 = (INT32) wsptr[4]; 1.1794 + z4 = (INT32) wsptr[6]; 1.1795 + 1.1796 + tmp10 = z3 + z4; 1.1797 + tmp11 = z3 - z4; 1.1798 + 1.1799 + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 1.1800 + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 1.1801 + 1.1802 + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 1.1803 + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 1.1804 + 1.1805 + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 1.1806 + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 1.1807 + 1.1808 + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 1.1809 + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 1.1810 + 1.1811 + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 1.1812 + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 1.1813 + 1.1814 + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 1.1815 + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 1.1816 + 1.1817 + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 1.1818 + 1.1819 + /* Odd part */ 1.1820 + 1.1821 + z1 = (INT32) wsptr[1]; 1.1822 + z2 = (INT32) wsptr[3]; 1.1823 + z3 = (INT32) wsptr[5]; 1.1824 + z4 = (INT32) wsptr[7]; 1.1825 + 1.1826 + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 1.1827 + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 1.1828 + tmp15 = z1 + z4; 1.1829 + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 1.1830 + tmp10 = tmp11 + tmp12 + tmp13 - 1.1831 + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 1.1832 + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 1.1833 + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 1.1834 + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 1.1835 + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 1.1836 + tmp11 += tmp14; 1.1837 + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 1.1838 + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 1.1839 + tmp12 += tmp14; 1.1840 + tmp13 += tmp14; 1.1841 + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 1.1842 + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 1.1843 + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 1.1844 + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 1.1845 + tmp14 += z1; 1.1846 + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 1.1847 + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 1.1848 + 1.1849 + /* Final output stage */ 1.1850 + 1.1851 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1.1852 + CONST_BITS+PASS1_BITS+3) 1.1853 + & RANGE_MASK]; 1.1854 + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1.1855 + CONST_BITS+PASS1_BITS+3) 1.1856 + & RANGE_MASK]; 1.1857 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1.1858 + CONST_BITS+PASS1_BITS+3) 1.1859 + & RANGE_MASK]; 1.1860 + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1.1861 + CONST_BITS+PASS1_BITS+3) 1.1862 + & RANGE_MASK]; 1.1863 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1.1864 + CONST_BITS+PASS1_BITS+3) 1.1865 + & RANGE_MASK]; 1.1866 + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1.1867 + CONST_BITS+PASS1_BITS+3) 1.1868 + & RANGE_MASK]; 1.1869 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1.1870 + CONST_BITS+PASS1_BITS+3) 1.1871 + & RANGE_MASK]; 1.1872 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1.1873 + CONST_BITS+PASS1_BITS+3) 1.1874 + & RANGE_MASK]; 1.1875 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1.1876 + CONST_BITS+PASS1_BITS+3) 1.1877 + & RANGE_MASK]; 1.1878 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1.1879 + CONST_BITS+PASS1_BITS+3) 1.1880 + & RANGE_MASK]; 1.1881 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1.1882 + CONST_BITS+PASS1_BITS+3) 1.1883 + & RANGE_MASK]; 1.1884 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1.1885 + CONST_BITS+PASS1_BITS+3) 1.1886 + & RANGE_MASK]; 1.1887 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, 1.1888 + CONST_BITS+PASS1_BITS+3) 1.1889 + & RANGE_MASK]; 1.1890 + 1.1891 + wsptr += 8; /* advance pointer to next row */ 1.1892 + } 1.1893 +} 1.1894 + 1.1895 + 1.1896 +/* 1.1897 + * Perform dequantization and inverse DCT on one block of coefficients, 1.1898 + * producing a 14x14 output block. 1.1899 + * 1.1900 + * Optimized algorithm with 20 multiplications in the 1-D kernel. 1.1901 + * cK represents sqrt(2) * cos(K*pi/28). 1.1902 + */ 1.1903 + 1.1904 +GLOBAL(void) 1.1905 +jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.1906 + JCOEFPTR coef_block, 1.1907 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.1908 +{ 1.1909 + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 1.1910 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 1.1911 + INT32 z1, z2, z3, z4; 1.1912 + JCOEFPTR inptr; 1.1913 + ISLOW_MULT_TYPE * quantptr; 1.1914 + int * wsptr; 1.1915 + JSAMPROW outptr; 1.1916 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.1917 + int ctr; 1.1918 + int workspace[8*14]; /* buffers data between passes */ 1.1919 + SHIFT_TEMPS 1.1920 + 1.1921 + /* Pass 1: process columns from input, store into work array. */ 1.1922 + 1.1923 + inptr = coef_block; 1.1924 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.1925 + wsptr = workspace; 1.1926 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.1927 + /* Even part */ 1.1928 + 1.1929 + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.1930 + z1 <<= CONST_BITS; 1.1931 + /* Add fudge factor here for final descale. */ 1.1932 + z1 += ONE << (CONST_BITS-PASS1_BITS-1); 1.1933 + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.1934 + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 1.1935 + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 1.1936 + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 1.1937 + 1.1938 + tmp10 = z1 + z2; 1.1939 + tmp11 = z1 + z3; 1.1940 + tmp12 = z1 - z4; 1.1941 + 1.1942 + tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ 1.1943 + CONST_BITS-PASS1_BITS); 1.1944 + 1.1945 + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.1946 + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.1947 + 1.1948 + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 1.1949 + 1.1950 + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 1.1951 + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 1.1952 + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 1.1953 + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 1.1954 + 1.1955 + tmp20 = tmp10 + tmp13; 1.1956 + tmp26 = tmp10 - tmp13; 1.1957 + tmp21 = tmp11 + tmp14; 1.1958 + tmp25 = tmp11 - tmp14; 1.1959 + tmp22 = tmp12 + tmp15; 1.1960 + tmp24 = tmp12 - tmp15; 1.1961 + 1.1962 + /* Odd part */ 1.1963 + 1.1964 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.1965 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.1966 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.1967 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.1968 + tmp13 = z4 << CONST_BITS; 1.1969 + 1.1970 + tmp14 = z1 + z3; 1.1971 + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 1.1972 + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 1.1973 + tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 1.1974 + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 1.1975 + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 1.1976 + z1 -= z2; 1.1977 + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ 1.1978 + tmp16 += tmp15; 1.1979 + z1 += z4; 1.1980 + z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ 1.1981 + tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 1.1982 + tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 1.1983 + z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 1.1984 + tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 1.1985 + tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 1.1986 + 1.1987 + tmp13 = (z1 - z3) << PASS1_BITS; 1.1988 + 1.1989 + /* Final output stage */ 1.1990 + 1.1991 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1.1992 + wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1.1993 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1.1994 + wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1.1995 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1.1996 + wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1.1997 + wsptr[8*3] = (int) (tmp23 + tmp13); 1.1998 + wsptr[8*10] = (int) (tmp23 - tmp13); 1.1999 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1.2000 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1.2001 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1.2002 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1.2003 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 1.2004 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 1.2005 + } 1.2006 + 1.2007 + /* Pass 2: process 14 rows from work array, store into output array. */ 1.2008 + 1.2009 + wsptr = workspace; 1.2010 + for (ctr = 0; ctr < 14; ctr++) { 1.2011 + outptr = output_buf[ctr] + output_col; 1.2012 + 1.2013 + /* Even part */ 1.2014 + 1.2015 + /* Add fudge factor here for final descale. */ 1.2016 + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.2017 + z1 <<= CONST_BITS; 1.2018 + z4 = (INT32) wsptr[4]; 1.2019 + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 1.2020 + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 1.2021 + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 1.2022 + 1.2023 + tmp10 = z1 + z2; 1.2024 + tmp11 = z1 + z3; 1.2025 + tmp12 = z1 - z4; 1.2026 + 1.2027 + tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ 1.2028 + 1.2029 + z1 = (INT32) wsptr[2]; 1.2030 + z2 = (INT32) wsptr[6]; 1.2031 + 1.2032 + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 1.2033 + 1.2034 + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 1.2035 + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 1.2036 + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 1.2037 + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 1.2038 + 1.2039 + tmp20 = tmp10 + tmp13; 1.2040 + tmp26 = tmp10 - tmp13; 1.2041 + tmp21 = tmp11 + tmp14; 1.2042 + tmp25 = tmp11 - tmp14; 1.2043 + tmp22 = tmp12 + tmp15; 1.2044 + tmp24 = tmp12 - tmp15; 1.2045 + 1.2046 + /* Odd part */ 1.2047 + 1.2048 + z1 = (INT32) wsptr[1]; 1.2049 + z2 = (INT32) wsptr[3]; 1.2050 + z3 = (INT32) wsptr[5]; 1.2051 + z4 = (INT32) wsptr[7]; 1.2052 + z4 <<= CONST_BITS; 1.2053 + 1.2054 + tmp14 = z1 + z3; 1.2055 + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 1.2056 + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 1.2057 + tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 1.2058 + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 1.2059 + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 1.2060 + z1 -= z2; 1.2061 + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ 1.2062 + tmp16 += tmp15; 1.2063 + tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ 1.2064 + tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 1.2065 + tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 1.2066 + tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 1.2067 + tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 1.2068 + tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 1.2069 + 1.2070 + tmp13 = ((z1 - z3) << CONST_BITS) + z4; 1.2071 + 1.2072 + /* Final output stage */ 1.2073 + 1.2074 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1.2075 + CONST_BITS+PASS1_BITS+3) 1.2076 + & RANGE_MASK]; 1.2077 + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1.2078 + CONST_BITS+PASS1_BITS+3) 1.2079 + & RANGE_MASK]; 1.2080 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1.2081 + CONST_BITS+PASS1_BITS+3) 1.2082 + & RANGE_MASK]; 1.2083 + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1.2084 + CONST_BITS+PASS1_BITS+3) 1.2085 + & RANGE_MASK]; 1.2086 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1.2087 + CONST_BITS+PASS1_BITS+3) 1.2088 + & RANGE_MASK]; 1.2089 + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1.2090 + CONST_BITS+PASS1_BITS+3) 1.2091 + & RANGE_MASK]; 1.2092 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1.2093 + CONST_BITS+PASS1_BITS+3) 1.2094 + & RANGE_MASK]; 1.2095 + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1.2096 + CONST_BITS+PASS1_BITS+3) 1.2097 + & RANGE_MASK]; 1.2098 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1.2099 + CONST_BITS+PASS1_BITS+3) 1.2100 + & RANGE_MASK]; 1.2101 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1.2102 + CONST_BITS+PASS1_BITS+3) 1.2103 + & RANGE_MASK]; 1.2104 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1.2105 + CONST_BITS+PASS1_BITS+3) 1.2106 + & RANGE_MASK]; 1.2107 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1.2108 + CONST_BITS+PASS1_BITS+3) 1.2109 + & RANGE_MASK]; 1.2110 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 1.2111 + CONST_BITS+PASS1_BITS+3) 1.2112 + & RANGE_MASK]; 1.2113 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 1.2114 + CONST_BITS+PASS1_BITS+3) 1.2115 + & RANGE_MASK]; 1.2116 + 1.2117 + wsptr += 8; /* advance pointer to next row */ 1.2118 + } 1.2119 +} 1.2120 + 1.2121 + 1.2122 +/* 1.2123 + * Perform dequantization and inverse DCT on one block of coefficients, 1.2124 + * producing a 15x15 output block. 1.2125 + * 1.2126 + * Optimized algorithm with 22 multiplications in the 1-D kernel. 1.2127 + * cK represents sqrt(2) * cos(K*pi/30). 1.2128 + */ 1.2129 + 1.2130 +GLOBAL(void) 1.2131 +jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.2132 + JCOEFPTR coef_block, 1.2133 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.2134 +{ 1.2135 + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 1.2136 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 1.2137 + INT32 z1, z2, z3, z4; 1.2138 + JCOEFPTR inptr; 1.2139 + ISLOW_MULT_TYPE * quantptr; 1.2140 + int * wsptr; 1.2141 + JSAMPROW outptr; 1.2142 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.2143 + int ctr; 1.2144 + int workspace[8*15]; /* buffers data between passes */ 1.2145 + SHIFT_TEMPS 1.2146 + 1.2147 + /* Pass 1: process columns from input, store into work array. */ 1.2148 + 1.2149 + inptr = coef_block; 1.2150 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.2151 + wsptr = workspace; 1.2152 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.2153 + /* Even part */ 1.2154 + 1.2155 + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.2156 + z1 <<= CONST_BITS; 1.2157 + /* Add fudge factor here for final descale. */ 1.2158 + z1 += ONE << (CONST_BITS-PASS1_BITS-1); 1.2159 + 1.2160 + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.2161 + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.2162 + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.2163 + 1.2164 + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 1.2165 + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 1.2166 + 1.2167 + tmp12 = z1 - tmp10; 1.2168 + tmp13 = z1 + tmp11; 1.2169 + z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 1.2170 + 1.2171 + z4 = z2 - z3; 1.2172 + z3 += z2; 1.2173 + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 1.2174 + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 1.2175 + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 1.2176 + 1.2177 + tmp20 = tmp13 + tmp10 + tmp11; 1.2178 + tmp23 = tmp12 - tmp10 + tmp11 + z2; 1.2179 + 1.2180 + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 1.2181 + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 1.2182 + 1.2183 + tmp25 = tmp13 - tmp10 - tmp11; 1.2184 + tmp26 = tmp12 + tmp10 - tmp11 - z2; 1.2185 + 1.2186 + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 1.2187 + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 1.2188 + 1.2189 + tmp21 = tmp12 + tmp10 + tmp11; 1.2190 + tmp24 = tmp13 - tmp10 + tmp11; 1.2191 + tmp11 += tmp11; 1.2192 + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 1.2193 + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 1.2194 + 1.2195 + /* Odd part */ 1.2196 + 1.2197 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.2198 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.2199 + z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.2200 + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 1.2201 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.2202 + 1.2203 + tmp13 = z2 - z4; 1.2204 + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 1.2205 + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 1.2206 + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 1.2207 + 1.2208 + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 1.2209 + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 1.2210 + z2 = z1 - z4; 1.2211 + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 1.2212 + 1.2213 + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 1.2214 + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 1.2215 + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 1.2216 + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 1.2217 + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 1.2218 + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 1.2219 + 1.2220 + /* Final output stage */ 1.2221 + 1.2222 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1.2223 + wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1.2224 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1.2225 + wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1.2226 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1.2227 + wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1.2228 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1.2229 + wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1.2230 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1.2231 + wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1.2232 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1.2233 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1.2234 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 1.2235 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 1.2236 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); 1.2237 + } 1.2238 + 1.2239 + /* Pass 2: process 15 rows from work array, store into output array. */ 1.2240 + 1.2241 + wsptr = workspace; 1.2242 + for (ctr = 0; ctr < 15; ctr++) { 1.2243 + outptr = output_buf[ctr] + output_col; 1.2244 + 1.2245 + /* Even part */ 1.2246 + 1.2247 + /* Add fudge factor here for final descale. */ 1.2248 + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.2249 + z1 <<= CONST_BITS; 1.2250 + 1.2251 + z2 = (INT32) wsptr[2]; 1.2252 + z3 = (INT32) wsptr[4]; 1.2253 + z4 = (INT32) wsptr[6]; 1.2254 + 1.2255 + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 1.2256 + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 1.2257 + 1.2258 + tmp12 = z1 - tmp10; 1.2259 + tmp13 = z1 + tmp11; 1.2260 + z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 1.2261 + 1.2262 + z4 = z2 - z3; 1.2263 + z3 += z2; 1.2264 + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 1.2265 + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 1.2266 + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 1.2267 + 1.2268 + tmp20 = tmp13 + tmp10 + tmp11; 1.2269 + tmp23 = tmp12 - tmp10 + tmp11 + z2; 1.2270 + 1.2271 + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 1.2272 + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 1.2273 + 1.2274 + tmp25 = tmp13 - tmp10 - tmp11; 1.2275 + tmp26 = tmp12 + tmp10 - tmp11 - z2; 1.2276 + 1.2277 + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 1.2278 + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 1.2279 + 1.2280 + tmp21 = tmp12 + tmp10 + tmp11; 1.2281 + tmp24 = tmp13 - tmp10 + tmp11; 1.2282 + tmp11 += tmp11; 1.2283 + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 1.2284 + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 1.2285 + 1.2286 + /* Odd part */ 1.2287 + 1.2288 + z1 = (INT32) wsptr[1]; 1.2289 + z2 = (INT32) wsptr[3]; 1.2290 + z4 = (INT32) wsptr[5]; 1.2291 + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 1.2292 + z4 = (INT32) wsptr[7]; 1.2293 + 1.2294 + tmp13 = z2 - z4; 1.2295 + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 1.2296 + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 1.2297 + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 1.2298 + 1.2299 + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 1.2300 + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 1.2301 + z2 = z1 - z4; 1.2302 + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 1.2303 + 1.2304 + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 1.2305 + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 1.2306 + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 1.2307 + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 1.2308 + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 1.2309 + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 1.2310 + 1.2311 + /* Final output stage */ 1.2312 + 1.2313 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1.2314 + CONST_BITS+PASS1_BITS+3) 1.2315 + & RANGE_MASK]; 1.2316 + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1.2317 + CONST_BITS+PASS1_BITS+3) 1.2318 + & RANGE_MASK]; 1.2319 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1.2320 + CONST_BITS+PASS1_BITS+3) 1.2321 + & RANGE_MASK]; 1.2322 + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1.2323 + CONST_BITS+PASS1_BITS+3) 1.2324 + & RANGE_MASK]; 1.2325 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1.2326 + CONST_BITS+PASS1_BITS+3) 1.2327 + & RANGE_MASK]; 1.2328 + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1.2329 + CONST_BITS+PASS1_BITS+3) 1.2330 + & RANGE_MASK]; 1.2331 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1.2332 + CONST_BITS+PASS1_BITS+3) 1.2333 + & RANGE_MASK]; 1.2334 + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1.2335 + CONST_BITS+PASS1_BITS+3) 1.2336 + & RANGE_MASK]; 1.2337 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1.2338 + CONST_BITS+PASS1_BITS+3) 1.2339 + & RANGE_MASK]; 1.2340 + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1.2341 + CONST_BITS+PASS1_BITS+3) 1.2342 + & RANGE_MASK]; 1.2343 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1.2344 + CONST_BITS+PASS1_BITS+3) 1.2345 + & RANGE_MASK]; 1.2346 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1.2347 + CONST_BITS+PASS1_BITS+3) 1.2348 + & RANGE_MASK]; 1.2349 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 1.2350 + CONST_BITS+PASS1_BITS+3) 1.2351 + & RANGE_MASK]; 1.2352 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 1.2353 + CONST_BITS+PASS1_BITS+3) 1.2354 + & RANGE_MASK]; 1.2355 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, 1.2356 + CONST_BITS+PASS1_BITS+3) 1.2357 + & RANGE_MASK]; 1.2358 + 1.2359 + wsptr += 8; /* advance pointer to next row */ 1.2360 + } 1.2361 +} 1.2362 + 1.2363 + 1.2364 +/* 1.2365 + * Perform dequantization and inverse DCT on one block of coefficients, 1.2366 + * producing a 16x16 output block. 1.2367 + * 1.2368 + * Optimized algorithm with 28 multiplications in the 1-D kernel. 1.2369 + * cK represents sqrt(2) * cos(K*pi/32). 1.2370 + */ 1.2371 + 1.2372 +GLOBAL(void) 1.2373 +jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1.2374 + JCOEFPTR coef_block, 1.2375 + JSAMPARRAY output_buf, JDIMENSION output_col) 1.2376 +{ 1.2377 + INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 1.2378 + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 1.2379 + INT32 z1, z2, z3, z4; 1.2380 + JCOEFPTR inptr; 1.2381 + ISLOW_MULT_TYPE * quantptr; 1.2382 + int * wsptr; 1.2383 + JSAMPROW outptr; 1.2384 + JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1.2385 + int ctr; 1.2386 + int workspace[8*16]; /* buffers data between passes */ 1.2387 + SHIFT_TEMPS 1.2388 + 1.2389 + /* Pass 1: process columns from input, store into work array. */ 1.2390 + 1.2391 + inptr = coef_block; 1.2392 + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1.2393 + wsptr = workspace; 1.2394 + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1.2395 + /* Even part */ 1.2396 + 1.2397 + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1.2398 + tmp0 <<= CONST_BITS; 1.2399 + /* Add fudge factor here for final descale. */ 1.2400 + tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); 1.2401 + 1.2402 + z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1.2403 + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 1.2404 + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 1.2405 + 1.2406 + tmp10 = tmp0 + tmp1; 1.2407 + tmp11 = tmp0 - tmp1; 1.2408 + tmp12 = tmp0 + tmp2; 1.2409 + tmp13 = tmp0 - tmp2; 1.2410 + 1.2411 + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1.2412 + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1.2413 + z3 = z1 - z2; 1.2414 + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 1.2415 + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 1.2416 + 1.2417 + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 1.2418 + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 1.2419 + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 1.2420 + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 1.2421 + 1.2422 + tmp20 = tmp10 + tmp0; 1.2423 + tmp27 = tmp10 - tmp0; 1.2424 + tmp21 = tmp12 + tmp1; 1.2425 + tmp26 = tmp12 - tmp1; 1.2426 + tmp22 = tmp13 + tmp2; 1.2427 + tmp25 = tmp13 - tmp2; 1.2428 + tmp23 = tmp11 + tmp3; 1.2429 + tmp24 = tmp11 - tmp3; 1.2430 + 1.2431 + /* Odd part */ 1.2432 + 1.2433 + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1.2434 + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1.2435 + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1.2436 + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1.2437 + 1.2438 + tmp11 = z1 + z3; 1.2439 + 1.2440 + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 1.2441 + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 1.2442 + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 1.2443 + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 1.2444 + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 1.2445 + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 1.2446 + tmp0 = tmp1 + tmp2 + tmp3 - 1.2447 + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 1.2448 + tmp13 = tmp10 + tmp11 + tmp12 - 1.2449 + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 1.2450 + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 1.2451 + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 1.2452 + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 1.2453 + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 1.2454 + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 1.2455 + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 1.2456 + z2 += z4; 1.2457 + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 1.2458 + tmp1 += z1; 1.2459 + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 1.2460 + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 1.2461 + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 1.2462 + tmp12 += z2; 1.2463 + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 1.2464 + tmp2 += z2; 1.2465 + tmp3 += z2; 1.2466 + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 1.2467 + tmp10 += z2; 1.2468 + tmp11 += z2; 1.2469 + 1.2470 + /* Final output stage */ 1.2471 + 1.2472 + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); 1.2473 + wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); 1.2474 + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); 1.2475 + wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); 1.2476 + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); 1.2477 + wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); 1.2478 + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); 1.2479 + wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); 1.2480 + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); 1.2481 + wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); 1.2482 + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); 1.2483 + wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); 1.2484 + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); 1.2485 + wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); 1.2486 + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); 1.2487 + wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); 1.2488 + } 1.2489 + 1.2490 + /* Pass 2: process 16 rows from work array, store into output array. */ 1.2491 + 1.2492 + wsptr = workspace; 1.2493 + for (ctr = 0; ctr < 16; ctr++) { 1.2494 + outptr = output_buf[ctr] + output_col; 1.2495 + 1.2496 + /* Even part */ 1.2497 + 1.2498 + /* Add fudge factor here for final descale. */ 1.2499 + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1.2500 + tmp0 <<= CONST_BITS; 1.2501 + 1.2502 + z1 = (INT32) wsptr[4]; 1.2503 + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 1.2504 + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 1.2505 + 1.2506 + tmp10 = tmp0 + tmp1; 1.2507 + tmp11 = tmp0 - tmp1; 1.2508 + tmp12 = tmp0 + tmp2; 1.2509 + tmp13 = tmp0 - tmp2; 1.2510 + 1.2511 + z1 = (INT32) wsptr[2]; 1.2512 + z2 = (INT32) wsptr[6]; 1.2513 + z3 = z1 - z2; 1.2514 + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 1.2515 + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 1.2516 + 1.2517 + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 1.2518 + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 1.2519 + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 1.2520 + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 1.2521 + 1.2522 + tmp20 = tmp10 + tmp0; 1.2523 + tmp27 = tmp10 - tmp0; 1.2524 + tmp21 = tmp12 + tmp1; 1.2525 + tmp26 = tmp12 - tmp1; 1.2526 + tmp22 = tmp13 + tmp2; 1.2527 + tmp25 = tmp13 - tmp2; 1.2528 + tmp23 = tmp11 + tmp3; 1.2529 + tmp24 = tmp11 - tmp3; 1.2530 + 1.2531 + /* Odd part */ 1.2532 + 1.2533 + z1 = (INT32) wsptr[1]; 1.2534 + z2 = (INT32) wsptr[3]; 1.2535 + z3 = (INT32) wsptr[5]; 1.2536 + z4 = (INT32) wsptr[7]; 1.2537 + 1.2538 + tmp11 = z1 + z3; 1.2539 + 1.2540 + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 1.2541 + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 1.2542 + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 1.2543 + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 1.2544 + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 1.2545 + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 1.2546 + tmp0 = tmp1 + tmp2 + tmp3 - 1.2547 + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 1.2548 + tmp13 = tmp10 + tmp11 + tmp12 - 1.2549 + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 1.2550 + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 1.2551 + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 1.2552 + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 1.2553 + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 1.2554 + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 1.2555 + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 1.2556 + z2 += z4; 1.2557 + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 1.2558 + tmp1 += z1; 1.2559 + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 1.2560 + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 1.2561 + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 1.2562 + tmp12 += z2; 1.2563 + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 1.2564 + tmp2 += z2; 1.2565 + tmp3 += z2; 1.2566 + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 1.2567 + tmp10 += z2; 1.2568 + tmp11 += z2; 1.2569 + 1.2570 + /* Final output stage */ 1.2571 + 1.2572 + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, 1.2573 + CONST_BITS+PASS1_BITS+3) 1.2574 + & RANGE_MASK]; 1.2575 + outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, 1.2576 + CONST_BITS+PASS1_BITS+3) 1.2577 + & RANGE_MASK]; 1.2578 + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, 1.2579 + CONST_BITS+PASS1_BITS+3) 1.2580 + & RANGE_MASK]; 1.2581 + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, 1.2582 + CONST_BITS+PASS1_BITS+3) 1.2583 + & RANGE_MASK]; 1.2584 + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, 1.2585 + CONST_BITS+PASS1_BITS+3) 1.2586 + & RANGE_MASK]; 1.2587 + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, 1.2588 + CONST_BITS+PASS1_BITS+3) 1.2589 + & RANGE_MASK]; 1.2590 + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, 1.2591 + CONST_BITS+PASS1_BITS+3) 1.2592 + & RANGE_MASK]; 1.2593 + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, 1.2594 + CONST_BITS+PASS1_BITS+3) 1.2595 + & RANGE_MASK]; 1.2596 + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, 1.2597 + CONST_BITS+PASS1_BITS+3) 1.2598 + & RANGE_MASK]; 1.2599 + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, 1.2600 + CONST_BITS+PASS1_BITS+3) 1.2601 + & RANGE_MASK]; 1.2602 + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, 1.2603 + CONST_BITS+PASS1_BITS+3) 1.2604 + & RANGE_MASK]; 1.2605 + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, 1.2606 + CONST_BITS+PASS1_BITS+3) 1.2607 + & RANGE_MASK]; 1.2608 + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, 1.2609 + CONST_BITS+PASS1_BITS+3) 1.2610 + & RANGE_MASK]; 1.2611 + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, 1.2612 + CONST_BITS+PASS1_BITS+3) 1.2613 + & RANGE_MASK]; 1.2614 + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, 1.2615 + CONST_BITS+PASS1_BITS+3) 1.2616 + & RANGE_MASK]; 1.2617 + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, 1.2618 + CONST_BITS+PASS1_BITS+3) 1.2619 + & RANGE_MASK]; 1.2620 + 1.2621 + wsptr += 8; /* advance pointer to next row */ 1.2622 + } 1.2623 +} 1.2624 + 1.2625 +#endif /* IDCT_SCALING_SUPPORTED */ 1.2626 +#endif /* DCT_ISLOW_SUPPORTED */