media/libjpeg/jcdctmgr.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  * jcdctmgr.c
     3  *
     4  * This file was part of the Independent JPEG Group's software:
     5  * Copyright (C) 1994-1996, Thomas G. Lane.
     6  * libjpeg-turbo Modifications:
     7  * Copyright (C) 1999-2006, MIYASAKA Masaru.
     8  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
     9  * Copyright (C) 2011 D. R. Commander
    10  * For conditions of distribution and use, see the accompanying README file.
    11  *
    12  * This file contains the forward-DCT management logic.
    13  * This code selects a particular DCT implementation to be used,
    14  * and it performs related housekeeping chores including coefficient
    15  * quantization.
    16  */
    18 #define JPEG_INTERNALS
    19 #include "jinclude.h"
    20 #include "jpeglib.h"
    21 #include "jdct.h"		/* Private declarations for DCT subsystem */
    22 #include "jsimddct.h"
    25 /* Private subobject for this module */
    27 typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
    28 typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
    30 typedef JMETHOD(void, convsamp_method_ptr,
    31                 (JSAMPARRAY sample_data, JDIMENSION start_col,
    32                  DCTELEM * workspace));
    33 typedef JMETHOD(void, float_convsamp_method_ptr,
    34                 (JSAMPARRAY sample_data, JDIMENSION start_col,
    35                  FAST_FLOAT *workspace));
    37 typedef JMETHOD(void, quantize_method_ptr,
    38                 (JCOEFPTR coef_block, DCTELEM * divisors,
    39                  DCTELEM * workspace));
    40 typedef JMETHOD(void, float_quantize_method_ptr,
    41                 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
    42                  FAST_FLOAT * workspace));
    44 METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
    46 typedef struct {
    47   struct jpeg_forward_dct pub;	/* public fields */
    49   /* Pointer to the DCT routine actually in use */
    50   forward_DCT_method_ptr dct;
    51   convsamp_method_ptr convsamp;
    52   quantize_method_ptr quantize;
    54   /* The actual post-DCT divisors --- not identical to the quant table
    55    * entries, because of scaling (especially for an unnormalized DCT).
    56    * Each table is given in normal array order.
    57    */
    58   DCTELEM * divisors[NUM_QUANT_TBLS];
    60   /* work area for FDCT subroutine */
    61   DCTELEM * workspace;
    63 #ifdef DCT_FLOAT_SUPPORTED
    64   /* Same as above for the floating-point case. */
    65   float_DCT_method_ptr float_dct;
    66   float_convsamp_method_ptr float_convsamp;
    67   float_quantize_method_ptr float_quantize;
    68   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
    69   FAST_FLOAT * float_workspace;
    70 #endif
    71 } my_fdct_controller;
    73 typedef my_fdct_controller * my_fdct_ptr;
    76 /*
    77  * Find the highest bit in an integer through binary search.
    78  */
    79 LOCAL(int)
    80 flss (UINT16 val)
    81 {
    82   int bit;
    84   bit = 16;
    86   if (!val)
    87     return 0;
    89   if (!(val & 0xff00)) {
    90     bit -= 8;
    91     val <<= 8;
    92   }
    93   if (!(val & 0xf000)) {
    94     bit -= 4;
    95     val <<= 4;
    96   }
    97   if (!(val & 0xc000)) {
    98     bit -= 2;
    99     val <<= 2;
   100   }
   101   if (!(val & 0x8000)) {
   102     bit -= 1;
   103     val <<= 1;
   104   }
   106   return bit;
   107 }
   109 /*
   110  * Compute values to do a division using reciprocal.
   111  *
   112  * This implementation is based on an algorithm described in
   113  *   "How to optimize for the Pentium family of microprocessors"
   114  *   (http://www.agner.org/assem/).
   115  * More information about the basic algorithm can be found in
   116  * the paper "Integer Division Using Reciprocals" by Robert Alverson.
   117  *
   118  * The basic idea is to replace x/d by x * d^-1. In order to store
   119  * d^-1 with enough precision we shift it left a few places. It turns
   120  * out that this algoright gives just enough precision, and also fits
   121  * into DCTELEM:
   122  *
   123  *   b = (the number of significant bits in divisor) - 1
   124  *   r = (word size) + b
   125  *   f = 2^r / divisor
   126  *
   127  * f will not be an integer for most cases, so we need to compensate
   128  * for the rounding error introduced:
   129  *
   130  *   no fractional part:
   131  *
   132  *       result = input >> r
   133  *
   134  *   fractional part of f < 0.5:
   135  *
   136  *       round f down to nearest integer
   137  *       result = ((input + 1) * f) >> r
   138  *
   139  *   fractional part of f > 0.5:
   140  *
   141  *       round f up to nearest integer
   142  *       result = (input * f) >> r
   143  *
   144  * This is the original algorithm that gives truncated results. But we
   145  * want properly rounded results, so we replace "input" with
   146  * "input + divisor/2".
   147  *
   148  * In order to allow SIMD implementations we also tweak the values to
   149  * allow the same calculation to be made at all times:
   150  * 
   151  *   dctbl[0] = f rounded to nearest integer
   152  *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
   153  *   dctbl[2] = 1 << ((word size) * 2 - r)
   154  *   dctbl[3] = r - (word size)
   155  *
   156  * dctbl[2] is for stupid instruction sets where the shift operation
   157  * isn't member wise (e.g. MMX).
   158  *
   159  * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
   160  * is that most SIMD implementations have a "multiply and store top
   161  * half" operation.
   162  *
   163  * Lastly, we store each of the values in their own table instead
   164  * of in a consecutive manner, yet again in order to allow SIMD
   165  * routines.
   166  */
   167 LOCAL(int)
   168 compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
   169 {
   170   UDCTELEM2 fq, fr;
   171   UDCTELEM c;
   172   int b, r;
   174   b = flss(divisor) - 1;
   175   r  = sizeof(DCTELEM) * 8 + b;
   177   fq = ((UDCTELEM2)1 << r) / divisor;
   178   fr = ((UDCTELEM2)1 << r) % divisor;
   180   c = divisor / 2; /* for rounding */
   182   if (fr == 0) { /* divisor is power of two */
   183     /* fq will be one bit too large to fit in DCTELEM, so adjust */
   184     fq >>= 1;
   185     r--;
   186   } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
   187     c++;
   188   } else { /* fractional part is > 0.5 */
   189     fq++;
   190   }
   192   dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;      /* reciprocal */
   193   dtbl[DCTSIZE2 * 1] = (DCTELEM) c;       /* correction + roundfactor */
   194   dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r));  /* scale */
   195   dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
   197   if(r <= 16) return 0;
   198   else return 1;
   199 }
   201 /*
   202  * Initialize for a processing pass.
   203  * Verify that all referenced Q-tables are present, and set up
   204  * the divisor table for each one.
   205  * In the current implementation, DCT of all components is done during
   206  * the first pass, even if only some components will be output in the
   207  * first scan.  Hence all components should be examined here.
   208  */
   210 METHODDEF(void)
   211 start_pass_fdctmgr (j_compress_ptr cinfo)
   212 {
   213   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   214   int ci, qtblno, i;
   215   jpeg_component_info *compptr;
   216   JQUANT_TBL * qtbl;
   217   DCTELEM * dtbl;
   219   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
   220        ci++, compptr++) {
   221     qtblno = compptr->quant_tbl_no;
   222     /* Make sure specified quantization table is present */
   223     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
   224 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
   225       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
   226     qtbl = cinfo->quant_tbl_ptrs[qtblno];
   227     /* Compute divisors for this quant table */
   228     /* We may do this more than once for same table, but it's not a big deal */
   229     switch (cinfo->dct_method) {
   230 #ifdef DCT_ISLOW_SUPPORTED
   231     case JDCT_ISLOW:
   232       /* For LL&M IDCT method, divisors are equal to raw quantization
   233        * coefficients multiplied by 8 (to counteract scaling).
   234        */
   235       if (fdct->divisors[qtblno] == NULL) {
   236 	fdct->divisors[qtblno] = (DCTELEM *)
   237 	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   238 				      (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
   239       }
   240       dtbl = fdct->divisors[qtblno];
   241       for (i = 0; i < DCTSIZE2; i++) {
   242 	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
   243 	  && fdct->quantize == jsimd_quantize)
   244 	  fdct->quantize = quantize;
   245       }
   246       break;
   247 #endif
   248 #ifdef DCT_IFAST_SUPPORTED
   249     case JDCT_IFAST:
   250       {
   251 	/* For AA&N IDCT method, divisors are equal to quantization
   252 	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
   253 	 *   scalefactor[0] = 1
   254 	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
   255 	 * We apply a further scale factor of 8.
   256 	 */
   257 #define CONST_BITS 14
   258 	static const INT16 aanscales[DCTSIZE2] = {
   259 	  /* precomputed values scaled up by 14 bits */
   260 	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
   261 	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
   262 	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
   263 	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
   264 	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
   265 	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
   266 	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
   267 	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
   268 	};
   269 	SHIFT_TEMPS
   271 	if (fdct->divisors[qtblno] == NULL) {
   272 	  fdct->divisors[qtblno] = (DCTELEM *)
   273 	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   274 					(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
   275 	}
   276 	dtbl = fdct->divisors[qtblno];
   277 	for (i = 0; i < DCTSIZE2; i++) {
   278 	  if(!compute_reciprocal(
   279 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
   280 				  (INT32) aanscales[i]),
   281 		    CONST_BITS-3), &dtbl[i])
   282 	    && fdct->quantize == jsimd_quantize)
   283 	    fdct->quantize = quantize;
   284 	}
   285       }
   286       break;
   287 #endif
   288 #ifdef DCT_FLOAT_SUPPORTED
   289     case JDCT_FLOAT:
   290       {
   291 	/* For float AA&N IDCT method, divisors are equal to quantization
   292 	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
   293 	 *   scalefactor[0] = 1
   294 	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
   295 	 * We apply a further scale factor of 8.
   296 	 * What's actually stored is 1/divisor so that the inner loop can
   297 	 * use a multiplication rather than a division.
   298 	 */
   299 	FAST_FLOAT * fdtbl;
   300 	int row, col;
   301 	static const double aanscalefactor[DCTSIZE] = {
   302 	  1.0, 1.387039845, 1.306562965, 1.175875602,
   303 	  1.0, 0.785694958, 0.541196100, 0.275899379
   304 	};
   306 	if (fdct->float_divisors[qtblno] == NULL) {
   307 	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
   308 	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   309 					DCTSIZE2 * SIZEOF(FAST_FLOAT));
   310 	}
   311 	fdtbl = fdct->float_divisors[qtblno];
   312 	i = 0;
   313 	for (row = 0; row < DCTSIZE; row++) {
   314 	  for (col = 0; col < DCTSIZE; col++) {
   315 	    fdtbl[i] = (FAST_FLOAT)
   316 	      (1.0 / (((double) qtbl->quantval[i] *
   317 		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
   318 	    i++;
   319 	  }
   320 	}
   321       }
   322       break;
   323 #endif
   324     default:
   325       ERREXIT(cinfo, JERR_NOT_COMPILED);
   326       break;
   327     }
   328   }
   329 }
   332 /*
   333  * Load data into workspace, applying unsigned->signed conversion.
   334  */
   336 METHODDEF(void)
   337 convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
   338 {
   339   register DCTELEM *workspaceptr;
   340   register JSAMPROW elemptr;
   341   register int elemr;
   343   workspaceptr = workspace;
   344   for (elemr = 0; elemr < DCTSIZE; elemr++) {
   345     elemptr = sample_data[elemr] + start_col;
   347 #if DCTSIZE == 8		/* unroll the inner loop */
   348     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   349     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   350     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   351     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   352     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   353     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   354     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   355     *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   356 #else
   357     {
   358       register int elemc;
   359       for (elemc = DCTSIZE; elemc > 0; elemc--)
   360         *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
   361     }
   362 #endif
   363   }
   364 }
   367 /*
   368  * Quantize/descale the coefficients, and store into coef_blocks[].
   369  */
   371 METHODDEF(void)
   372 quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
   373 {
   374   int i;
   375   DCTELEM temp;
   376   UDCTELEM recip, corr, shift;
   377   UDCTELEM2 product;
   378   JCOEFPTR output_ptr = coef_block;
   380   for (i = 0; i < DCTSIZE2; i++) {
   381     temp = workspace[i];
   382     recip = divisors[i + DCTSIZE2 * 0];
   383     corr =  divisors[i + DCTSIZE2 * 1];
   384     shift = divisors[i + DCTSIZE2 * 3];
   386     if (temp < 0) {
   387       temp = -temp;
   388       product = (UDCTELEM2)(temp + corr) * recip;
   389       product >>= shift + sizeof(DCTELEM)*8;
   390       temp = product;
   391       temp = -temp;
   392     } else {
   393       product = (UDCTELEM2)(temp + corr) * recip;
   394       product >>= shift + sizeof(DCTELEM)*8;
   395       temp = product;
   396     }
   398     output_ptr[i] = (JCOEF) temp;
   399   }
   400 }
   403 /*
   404  * Perform forward DCT on one or more blocks of a component.
   405  *
   406  * The input samples are taken from the sample_data[] array starting at
   407  * position start_row/start_col, and moving to the right for any additional
   408  * blocks. The quantized coefficients are returned in coef_blocks[].
   409  */
   411 METHODDEF(void)
   412 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
   413 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
   414 	     JDIMENSION start_row, JDIMENSION start_col,
   415 	     JDIMENSION num_blocks)
   416 /* This version is used for integer DCT implementations. */
   417 {
   418   /* This routine is heavily used, so it's worth coding it tightly. */
   419   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   420   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
   421   DCTELEM * workspace;
   422   JDIMENSION bi;
   424   /* Make sure the compiler doesn't look up these every pass */
   425   forward_DCT_method_ptr do_dct = fdct->dct;
   426   convsamp_method_ptr do_convsamp = fdct->convsamp;
   427   quantize_method_ptr do_quantize = fdct->quantize;
   428   workspace = fdct->workspace;
   430   sample_data += start_row;	/* fold in the vertical offset once */
   432   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
   433     /* Load data into workspace, applying unsigned->signed conversion */
   434     (*do_convsamp) (sample_data, start_col, workspace);
   436     /* Perform the DCT */
   437     (*do_dct) (workspace);
   439     /* Quantize/descale the coefficients, and store into coef_blocks[] */
   440     (*do_quantize) (coef_blocks[bi], divisors, workspace);
   441   }
   442 }
   445 #ifdef DCT_FLOAT_SUPPORTED
   448 METHODDEF(void)
   449 convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
   450 {
   451   register FAST_FLOAT *workspaceptr;
   452   register JSAMPROW elemptr;
   453   register int elemr;
   455   workspaceptr = workspace;
   456   for (elemr = 0; elemr < DCTSIZE; elemr++) {
   457     elemptr = sample_data[elemr] + start_col;
   458 #if DCTSIZE == 8		/* unroll the inner loop */
   459     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   460     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   461     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   462     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   463     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   464     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   465     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   466     *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   467 #else
   468     {
   469       register int elemc;
   470       for (elemc = DCTSIZE; elemc > 0; elemc--)
   471         *workspaceptr++ = (FAST_FLOAT)
   472                           (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
   473     }
   474 #endif
   475   }
   476 }
   479 METHODDEF(void)
   480 quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
   481 {
   482   register FAST_FLOAT temp;
   483   register int i;
   484   register JCOEFPTR output_ptr = coef_block;
   486   for (i = 0; i < DCTSIZE2; i++) {
   487     /* Apply the quantization and scaling factor */
   488     temp = workspace[i] * divisors[i];
   490     /* Round to nearest integer.
   491      * Since C does not specify the direction of rounding for negative
   492      * quotients, we have to force the dividend positive for portability.
   493      * The maximum coefficient size is +-16K (for 12-bit data), so this
   494      * code should work for either 16-bit or 32-bit ints.
   495      */
   496     output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
   497   }
   498 }
   501 METHODDEF(void)
   502 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
   503 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
   504 		   JDIMENSION start_row, JDIMENSION start_col,
   505 		   JDIMENSION num_blocks)
   506 /* This version is used for floating-point DCT implementations. */
   507 {
   508   /* This routine is heavily used, so it's worth coding it tightly. */
   509   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   510   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
   511   FAST_FLOAT * workspace;
   512   JDIMENSION bi;
   515   /* Make sure the compiler doesn't look up these every pass */
   516   float_DCT_method_ptr do_dct = fdct->float_dct;
   517   float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
   518   float_quantize_method_ptr do_quantize = fdct->float_quantize;
   519   workspace = fdct->float_workspace;
   521   sample_data += start_row;	/* fold in the vertical offset once */
   523   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
   524     /* Load data into workspace, applying unsigned->signed conversion */
   525     (*do_convsamp) (sample_data, start_col, workspace);
   527     /* Perform the DCT */
   528     (*do_dct) (workspace);
   530     /* Quantize/descale the coefficients, and store into coef_blocks[] */
   531     (*do_quantize) (coef_blocks[bi], divisors, workspace);
   532   }
   533 }
   535 #endif /* DCT_FLOAT_SUPPORTED */
   538 /*
   539  * Initialize FDCT manager.
   540  */
   542 GLOBAL(void)
   543 jinit_forward_dct (j_compress_ptr cinfo)
   544 {
   545   my_fdct_ptr fdct;
   546   int i;
   548   fdct = (my_fdct_ptr)
   549     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   550 				SIZEOF(my_fdct_controller));
   551   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
   552   fdct->pub.start_pass = start_pass_fdctmgr;
   554   /* First determine the DCT... */
   555   switch (cinfo->dct_method) {
   556 #ifdef DCT_ISLOW_SUPPORTED
   557   case JDCT_ISLOW:
   558     fdct->pub.forward_DCT = forward_DCT;
   559     if (jsimd_can_fdct_islow())
   560       fdct->dct = jsimd_fdct_islow;
   561     else
   562       fdct->dct = jpeg_fdct_islow;
   563     break;
   564 #endif
   565 #ifdef DCT_IFAST_SUPPORTED
   566   case JDCT_IFAST:
   567     fdct->pub.forward_DCT = forward_DCT;
   568     if (jsimd_can_fdct_ifast())
   569       fdct->dct = jsimd_fdct_ifast;
   570     else
   571       fdct->dct = jpeg_fdct_ifast;
   572     break;
   573 #endif
   574 #ifdef DCT_FLOAT_SUPPORTED
   575   case JDCT_FLOAT:
   576     fdct->pub.forward_DCT = forward_DCT_float;
   577     if (jsimd_can_fdct_float())
   578       fdct->float_dct = jsimd_fdct_float;
   579     else
   580       fdct->float_dct = jpeg_fdct_float;
   581     break;
   582 #endif
   583   default:
   584     ERREXIT(cinfo, JERR_NOT_COMPILED);
   585     break;
   586   }
   588   /* ...then the supporting stages. */
   589   switch (cinfo->dct_method) {
   590 #ifdef DCT_ISLOW_SUPPORTED
   591   case JDCT_ISLOW:
   592 #endif
   593 #ifdef DCT_IFAST_SUPPORTED
   594   case JDCT_IFAST:
   595 #endif
   596 #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
   597     if (jsimd_can_convsamp())
   598       fdct->convsamp = jsimd_convsamp;
   599     else
   600       fdct->convsamp = convsamp;
   601     if (jsimd_can_quantize())
   602       fdct->quantize = jsimd_quantize;
   603     else
   604       fdct->quantize = quantize;
   605     break;
   606 #endif
   607 #ifdef DCT_FLOAT_SUPPORTED
   608   case JDCT_FLOAT:
   609     if (jsimd_can_convsamp_float())
   610       fdct->float_convsamp = jsimd_convsamp_float;
   611     else
   612       fdct->float_convsamp = convsamp_float;
   613     if (jsimd_can_quantize_float())
   614       fdct->float_quantize = jsimd_quantize_float;
   615     else
   616       fdct->float_quantize = quantize_float;
   617     break;
   618 #endif
   619   default:
   620     ERREXIT(cinfo, JERR_NOT_COMPILED);
   621     break;
   622   }
   624   /* Allocate workspace memory */
   625 #ifdef DCT_FLOAT_SUPPORTED
   626   if (cinfo->dct_method == JDCT_FLOAT)
   627     fdct->float_workspace = (FAST_FLOAT *)
   628       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   629 				  SIZEOF(FAST_FLOAT) * DCTSIZE2);
   630   else
   631 #endif
   632     fdct->workspace = (DCTELEM *)
   633       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
   634 				  SIZEOF(DCTELEM) * DCTSIZE2);
   636   /* Mark divisor tables unallocated */
   637   for (i = 0; i < NUM_QUANT_TBLS; i++) {
   638     fdct->divisors[i] = NULL;
   639 #ifdef DCT_FLOAT_SUPPORTED
   640     fdct->float_divisors[i] = NULL;
   641 #endif
   642   }
   643 }

mercurial