The Tor Browser: media/libjpeg/jidctfst.c@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*

     2  * jidctfst.c

     3  *

     4  * Copyright (C) 1994-1998, Thomas G. Lane.

     5  * This file is part of the Independent JPEG Group's software.

     6  * For conditions of distribution and use, see the accompanying README file.

     7  *

     8  * This file contains a fast, not so accurate integer implementation of the

     9  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine

    10  * must also perform dequantization of the input coefficients.

    11  *

    12  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT

    13  * on each row (or vice versa, but it's more convenient to emit a row at

    14  * a time).  Direct algorithms are also available, but they are much more

    15  * complex and seem not to be any faster when reduced to code.

    16  *

    17  * This implementation is based on Arai, Agui, and Nakajima's algorithm for

    18  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in

    19  * Japanese, but the algorithm is described in the Pennebaker & Mitchell

    20  * JPEG textbook (see REFERENCES section in file README).  The following code

    21  * is based directly on figure 4-8 in P&M.

    22  * While an 8-point DCT cannot be done in less than 11 multiplies, it is

    23  * possible to arrange the computation so that many of the multiplies are

    24  * simple scalings of the final outputs.  These multiplies can then be

    25  * folded into the multiplications or divisions by the JPEG quantization

    26  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds

    27  * to be done in the DCT itself.

    28  * The primary disadvantage of this method is that with fixed-point math,

    29  * accuracy is lost due to imprecise representation of the scaled

    30  * quantization values.  The smaller the quantization table entry, the less

    31  * precise the scaled value, so this implementation does worse with high-

    32  * quality-setting files than with low-quality ones.

    33  */

    35 #define JPEG_INTERNALS

    36 #include "jinclude.h"

    37 #include "jpeglib.h"

    38 #include "jdct.h"		/* Private declarations for DCT subsystem */

    40 #ifdef DCT_IFAST_SUPPORTED

    43 /*

    44  * This module is specialized to the case DCTSIZE = 8.

    45  */

    47 #if DCTSIZE != 8

    48   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */

    49 #endif

    52 /* Scaling decisions are generally the same as in the LL&M algorithm;

    53  * see jidctint.c for more details.  However, we choose to descale

    54  * (right shift) multiplication products as soon as they are formed,

    55  * rather than carrying additional fractional bits into subsequent additions.

    56  * This compromises accuracy slightly, but it lets us save a few shifts.

    57  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)

    58  * everywhere except in the multiplications proper; this saves a good deal

    59  * of work on 16-bit-int machines.

    60  *

    61  * The dequantized coefficients are not integers because the AA&N scaling

    62  * factors have been incorporated.  We represent them scaled up by PASS1_BITS,

    63  * so that the first and second IDCT rounds have the same input scaling.

    64  * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to

    65  * avoid a descaling shift; this compromises accuracy rather drastically

    66  * for small quantization table entries, but it saves a lot of shifts.

    67  * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,

    68  * so we use a much larger scaling factor to preserve accuracy.

    69  *

    70  * A final compromise is to represent the multiplicative constants to only

    71  * 8 fractional bits, rather than 13.  This saves some shifting work on some

    72  * machines, and may also reduce the cost of multiplication (since there

    73  * are fewer one-bits in the constants).

    74  */

    76 #if BITS_IN_JSAMPLE == 8

    77 #define CONST_BITS  8

    78 #define PASS1_BITS  2

    79 #else

    80 #define CONST_BITS  8

    81 #define PASS1_BITS  1		/* lose a little precision to avoid overflow */

    82 #endif

    84 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus

    85  * causing a lot of useless floating-point operations at run time.

    86  * To get around this we use the following pre-calculated constants.

    87  * If you change CONST_BITS you may want to add appropriate values.

    88  * (With a reasonable C compiler, you can just rely on the FIX() macro...)

    89  */

    91 #if CONST_BITS == 8

    92 #define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */

    93 #define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */

    94 #define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */

    95 #define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */

    96 #else

    97 #define FIX_1_082392200  FIX(1.082392200)

    98 #define FIX_1_414213562  FIX(1.414213562)

    99 #define FIX_1_847759065  FIX(1.847759065)

   100 #define FIX_2_613125930  FIX(2.613125930)

   101 #endif

   104 /* We can gain a little more speed, with a further compromise in accuracy,

   105  * by omitting the addition in a descaling shift.  This yields an incorrectly

   106  * rounded result half the time...

   107  */

   109 #ifndef USE_ACCURATE_ROUNDING

   110 #undef DESCALE

   111 #define DESCALE(x,n)  RIGHT_SHIFT(x, n)

   112 #endif

   115 /* Multiply a DCTELEM variable by an INT32 constant, and immediately

   116  * descale to yield a DCTELEM result.

   117  */

   119 #define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))

   122 /* Dequantize a coefficient by multiplying it by the multiplier-table

   123  * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16

   124  * multiplication will do.  For 12-bit data, the multiplier table is

   125  * declared INT32, so a 32-bit multiply will be used.

   126  */

   128 #if BITS_IN_JSAMPLE == 8

   129 #define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))

   130 #else

   131 #define DEQUANTIZE(coef,quantval)  \

   132 	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)

   133 #endif

   136 /* Like DESCALE, but applies to a DCTELEM and produces an int.

   137  * We assume that int right shift is unsigned if INT32 right shift is.

   138  */

   140 #ifdef RIGHT_SHIFT_IS_UNSIGNED

   141 #define ISHIFT_TEMPS	DCTELEM ishift_temp;

   142 #if BITS_IN_JSAMPLE == 8

   143 #define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */

   144 #else

   145 #define DCTELEMBITS  32		/* DCTELEM must be 32 bits */

   146 #endif

   147 #define IRIGHT_SHIFT(x,shft)  \

   148     ((ishift_temp = (x)) < 0 ? \

   149      (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \

   150      (ishift_temp >> (shft)))

   151 #else

   152 #define ISHIFT_TEMPS

   153 #define IRIGHT_SHIFT(x,shft)	((x) >> (shft))

   154 #endif

   156 #ifdef USE_ACCURATE_ROUNDING

   157 #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))

   158 #else

   159 #define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))

   160 #endif

   163 /*

   164  * Perform dequantization and inverse DCT on one block of coefficients.

   165  */

   167 GLOBAL(void)

   168 jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,

   169 		 JCOEFPTR coef_block,

   170 		 JSAMPARRAY output_buf, JDIMENSION output_col)

   171 {

   172   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;

   173   DCTELEM tmp10, tmp11, tmp12, tmp13;

   174   DCTELEM z5, z10, z11, z12, z13;

   175   JCOEFPTR inptr;

   176   IFAST_MULT_TYPE * quantptr;

   177   int * wsptr;

   178   JSAMPROW outptr;

   179   JSAMPLE *range_limit = IDCT_range_limit(cinfo);

   180   int ctr;

   181   int workspace[DCTSIZE2];	/* buffers data between passes */

   182   SHIFT_TEMPS			/* for DESCALE */

   183   ISHIFT_TEMPS			/* for IDESCALE */

   185   /* Pass 1: process columns from input, store into work array. */

   187   inptr = coef_block;

   188   quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;

   189   wsptr = workspace;

   190   for (ctr = DCTSIZE; ctr > 0; ctr--) {

   191     /* Due to quantization, we will usually find that many of the input

   192      * coefficients are zero, especially the AC terms.  We can exploit this

   193      * by short-circuiting the IDCT calculation for any column in which all

   194      * the AC terms are zero.  In that case each output is equal to the

   195      * DC coefficient (with scale factor as needed).

   196      * With typical images and quantization tables, half or more of the

   197      * column DCT calculations can be simplified this way.

   198      */

   200     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&

   201 	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&

   202 	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&

   203 	inptr[DCTSIZE*7] == 0) {

   204       /* AC terms all zero */

   205       int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);

   207       wsptr[DCTSIZE*0] = dcval;

   208       wsptr[DCTSIZE*1] = dcval;

   209       wsptr[DCTSIZE*2] = dcval;

   210       wsptr[DCTSIZE*3] = dcval;

   211       wsptr[DCTSIZE*4] = dcval;

   212       wsptr[DCTSIZE*5] = dcval;

   213       wsptr[DCTSIZE*6] = dcval;

   214       wsptr[DCTSIZE*7] = dcval;

   216       inptr++;			/* advance pointers to next column */

   217       quantptr++;

   218       wsptr++;

   219       continue;

   220     }

   222     /* Even part */

   224     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);

   225     tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);

   226     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);

   227     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);

   229     tmp10 = tmp0 + tmp2;	/* phase 3 */

   230     tmp11 = tmp0 - tmp2;

   232     tmp13 = tmp1 + tmp3;	/* phases 5-3 */

   233     tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */

   235     tmp0 = tmp10 + tmp13;	/* phase 2 */

   236     tmp3 = tmp10 - tmp13;

   237     tmp1 = tmp11 + tmp12;

   238     tmp2 = tmp11 - tmp12;

   240     /* Odd part */

   242     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);

   243     tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);

   244     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);

   245     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);

   247     z13 = tmp6 + tmp5;		/* phase 6 */

   248     z10 = tmp6 - tmp5;

   249     z11 = tmp4 + tmp7;

   250     z12 = tmp4 - tmp7;

   252     tmp7 = z11 + z13;		/* phase 5 */

   253     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */

   255     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */

   256     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */

   257     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */

   259     tmp6 = tmp12 - tmp7;	/* phase 2 */

   260     tmp5 = tmp11 - tmp6;

   261     tmp4 = tmp10 + tmp5;

   263     wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);

   264     wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);

   265     wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);

   266     wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);

   267     wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);

   268     wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);

   269     wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);

   270     wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);

   272     inptr++;			/* advance pointers to next column */

   273     quantptr++;

   274     wsptr++;

   275   }

   277   /* Pass 2: process rows from work array, store into output array. */

   278   /* Note that we must descale the results by a factor of 8 == 2**3, */

   279   /* and also undo the PASS1_BITS scaling. */

   281   wsptr = workspace;

   282   for (ctr = 0; ctr < DCTSIZE; ctr++) {

   283     outptr = output_buf[ctr] + output_col;

   284     /* Rows of zeroes can be exploited in the same way as we did with columns.

   285      * However, the column calculation has created many nonzero AC terms, so

   286      * the simplification applies less often (typically 5% to 10% of the time).

   287      * On machines with very fast multiplication, it's possible that the

   288      * test takes more time than it's worth.  In that case this section

   289      * may be commented out.

   290      */

   292 #ifndef NO_ZERO_ROW_TEST

   293     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&

   294 	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {

   295       /* AC terms all zero */

   296       JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)

   297 				  & RANGE_MASK];

   299       outptr[0] = dcval;

   300       outptr[1] = dcval;

   301       outptr[2] = dcval;

   302       outptr[3] = dcval;

   303       outptr[4] = dcval;

   304       outptr[5] = dcval;

   305       outptr[6] = dcval;

   306       outptr[7] = dcval;

   308       wsptr += DCTSIZE;		/* advance pointer to next row */

   309       continue;

   310     }

   311 #endif

   313     /* Even part */

   315     tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);

   316     tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);

   318     tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);

   319     tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)

   320 	    - tmp13;

   322     tmp0 = tmp10 + tmp13;

   323     tmp3 = tmp10 - tmp13;

   324     tmp1 = tmp11 + tmp12;

   325     tmp2 = tmp11 - tmp12;

   327     /* Odd part */

   329     z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];

   330     z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];

   331     z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];

   332     z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];

   334     tmp7 = z11 + z13;		/* phase 5 */

   335     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */

   337     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */

   338     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */

   339     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */

   341     tmp6 = tmp12 - tmp7;	/* phase 2 */

   342     tmp5 = tmp11 - tmp6;

   343     tmp4 = tmp10 + tmp5;

   345     /* Final output stage: scale down by a factor of 8 and range-limit */

   347     outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)

   348 			    & RANGE_MASK];

   349     outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)

   350 			    & RANGE_MASK];

   351     outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)

   352 			    & RANGE_MASK];

   353     outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)

   354 			    & RANGE_MASK];

   355     outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)

   356 			    & RANGE_MASK];

   357     outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)

   358 			    & RANGE_MASK];

   359     outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)

   360 			    & RANGE_MASK];

   361     outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)

   362 			    & RANGE_MASK];

   364     wsptr += DCTSIZE;		/* advance pointer to next row */

   365   }

   366 }

   368 #endif /* DCT_IFAST_SUPPORTED */

The Tor Browser / file revision

media/libjpeg/jidctfst.c@6474c204b198

media/libjpeg/jidctfst.c