michael@0: /* michael@0: * jcdctmgr.c michael@0: * michael@0: * This file was part of the Independent JPEG Group's software: michael@0: * Copyright (C) 1994-1996, Thomas G. Lane. michael@0: * libjpeg-turbo Modifications: michael@0: * Copyright (C) 1999-2006, MIYASAKA Masaru. michael@0: * Copyright 2009 Pierre Ossman for Cendio AB michael@0: * Copyright (C) 2011 D. R. Commander michael@0: * For conditions of distribution and use, see the accompanying README file. michael@0: * michael@0: * This file contains the forward-DCT management logic. michael@0: * This code selects a particular DCT implementation to be used, michael@0: * and it performs related housekeeping chores including coefficient michael@0: * quantization. michael@0: */ michael@0: michael@0: #define JPEG_INTERNALS michael@0: #include "jinclude.h" michael@0: #include "jpeglib.h" michael@0: #include "jdct.h" /* Private declarations for DCT subsystem */ michael@0: #include "jsimddct.h" michael@0: michael@0: michael@0: /* Private subobject for this module */ michael@0: michael@0: typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); michael@0: typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); michael@0: michael@0: typedef JMETHOD(void, convsamp_method_ptr, michael@0: (JSAMPARRAY sample_data, JDIMENSION start_col, michael@0: DCTELEM * workspace)); michael@0: typedef JMETHOD(void, float_convsamp_method_ptr, michael@0: (JSAMPARRAY sample_data, JDIMENSION start_col, michael@0: FAST_FLOAT *workspace)); michael@0: michael@0: typedef JMETHOD(void, quantize_method_ptr, michael@0: (JCOEFPTR coef_block, DCTELEM * divisors, michael@0: DCTELEM * workspace)); michael@0: typedef JMETHOD(void, float_quantize_method_ptr, michael@0: (JCOEFPTR coef_block, FAST_FLOAT * divisors, michael@0: FAST_FLOAT * workspace)); michael@0: michael@0: METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); michael@0: michael@0: typedef struct { michael@0: struct jpeg_forward_dct pub; /* public fields */ michael@0: michael@0: /* Pointer to the DCT routine actually in use */ michael@0: forward_DCT_method_ptr dct; michael@0: convsamp_method_ptr convsamp; michael@0: quantize_method_ptr quantize; michael@0: michael@0: /* The actual post-DCT divisors --- not identical to the quant table michael@0: * entries, because of scaling (especially for an unnormalized DCT). michael@0: * Each table is given in normal array order. michael@0: */ michael@0: DCTELEM * divisors[NUM_QUANT_TBLS]; michael@0: michael@0: /* work area for FDCT subroutine */ michael@0: DCTELEM * workspace; michael@0: michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: /* Same as above for the floating-point case. */ michael@0: float_DCT_method_ptr float_dct; michael@0: float_convsamp_method_ptr float_convsamp; michael@0: float_quantize_method_ptr float_quantize; michael@0: FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; michael@0: FAST_FLOAT * float_workspace; michael@0: #endif michael@0: } my_fdct_controller; michael@0: michael@0: typedef my_fdct_controller * my_fdct_ptr; michael@0: michael@0: michael@0: /* michael@0: * Find the highest bit in an integer through binary search. michael@0: */ michael@0: LOCAL(int) michael@0: flss (UINT16 val) michael@0: { michael@0: int bit; michael@0: michael@0: bit = 16; michael@0: michael@0: if (!val) michael@0: return 0; michael@0: michael@0: if (!(val & 0xff00)) { michael@0: bit -= 8; michael@0: val <<= 8; michael@0: } michael@0: if (!(val & 0xf000)) { michael@0: bit -= 4; michael@0: val <<= 4; michael@0: } michael@0: if (!(val & 0xc000)) { michael@0: bit -= 2; michael@0: val <<= 2; michael@0: } michael@0: if (!(val & 0x8000)) { michael@0: bit -= 1; michael@0: val <<= 1; michael@0: } michael@0: michael@0: return bit; michael@0: } michael@0: michael@0: /* michael@0: * Compute values to do a division using reciprocal. michael@0: * michael@0: * This implementation is based on an algorithm described in michael@0: * "How to optimize for the Pentium family of microprocessors" michael@0: * (http://www.agner.org/assem/). michael@0: * More information about the basic algorithm can be found in michael@0: * the paper "Integer Division Using Reciprocals" by Robert Alverson. michael@0: * michael@0: * The basic idea is to replace x/d by x * d^-1. In order to store michael@0: * d^-1 with enough precision we shift it left a few places. It turns michael@0: * out that this algoright gives just enough precision, and also fits michael@0: * into DCTELEM: michael@0: * michael@0: * b = (the number of significant bits in divisor) - 1 michael@0: * r = (word size) + b michael@0: * f = 2^r / divisor michael@0: * michael@0: * f will not be an integer for most cases, so we need to compensate michael@0: * for the rounding error introduced: michael@0: * michael@0: * no fractional part: michael@0: * michael@0: * result = input >> r michael@0: * michael@0: * fractional part of f < 0.5: michael@0: * michael@0: * round f down to nearest integer michael@0: * result = ((input + 1) * f) >> r michael@0: * michael@0: * fractional part of f > 0.5: michael@0: * michael@0: * round f up to nearest integer michael@0: * result = (input * f) >> r michael@0: * michael@0: * This is the original algorithm that gives truncated results. But we michael@0: * want properly rounded results, so we replace "input" with michael@0: * "input + divisor/2". michael@0: * michael@0: * In order to allow SIMD implementations we also tweak the values to michael@0: * allow the same calculation to be made at all times: michael@0: * michael@0: * dctbl[0] = f rounded to nearest integer michael@0: * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5) michael@0: * dctbl[2] = 1 << ((word size) * 2 - r) michael@0: * dctbl[3] = r - (word size) michael@0: * michael@0: * dctbl[2] is for stupid instruction sets where the shift operation michael@0: * isn't member wise (e.g. MMX). michael@0: * michael@0: * The reason dctbl[2] and dctbl[3] reduce the shift with (word size) michael@0: * is that most SIMD implementations have a "multiply and store top michael@0: * half" operation. michael@0: * michael@0: * Lastly, we store each of the values in their own table instead michael@0: * of in a consecutive manner, yet again in order to allow SIMD michael@0: * routines. michael@0: */ michael@0: LOCAL(int) michael@0: compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) michael@0: { michael@0: UDCTELEM2 fq, fr; michael@0: UDCTELEM c; michael@0: int b, r; michael@0: michael@0: b = flss(divisor) - 1; michael@0: r = sizeof(DCTELEM) * 8 + b; michael@0: michael@0: fq = ((UDCTELEM2)1 << r) / divisor; michael@0: fr = ((UDCTELEM2)1 << r) % divisor; michael@0: michael@0: c = divisor / 2; /* for rounding */ michael@0: michael@0: if (fr == 0) { /* divisor is power of two */ michael@0: /* fq will be one bit too large to fit in DCTELEM, so adjust */ michael@0: fq >>= 1; michael@0: r--; michael@0: } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ michael@0: c++; michael@0: } else { /* fractional part is > 0.5 */ michael@0: fq++; michael@0: } michael@0: michael@0: dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ michael@0: dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ michael@0: dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ michael@0: dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ michael@0: michael@0: if(r <= 16) return 0; michael@0: else return 1; michael@0: } michael@0: michael@0: /* michael@0: * Initialize for a processing pass. michael@0: * Verify that all referenced Q-tables are present, and set up michael@0: * the divisor table for each one. michael@0: * In the current implementation, DCT of all components is done during michael@0: * the first pass, even if only some components will be output in the michael@0: * first scan. Hence all components should be examined here. michael@0: */ michael@0: michael@0: METHODDEF(void) michael@0: start_pass_fdctmgr (j_compress_ptr cinfo) michael@0: { michael@0: my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; michael@0: int ci, qtblno, i; michael@0: jpeg_component_info *compptr; michael@0: JQUANT_TBL * qtbl; michael@0: DCTELEM * dtbl; michael@0: michael@0: for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; michael@0: ci++, compptr++) { michael@0: qtblno = compptr->quant_tbl_no; michael@0: /* Make sure specified quantization table is present */ michael@0: if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || michael@0: cinfo->quant_tbl_ptrs[qtblno] == NULL) michael@0: ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); michael@0: qtbl = cinfo->quant_tbl_ptrs[qtblno]; michael@0: /* Compute divisors for this quant table */ michael@0: /* We may do this more than once for same table, but it's not a big deal */ michael@0: switch (cinfo->dct_method) { michael@0: #ifdef DCT_ISLOW_SUPPORTED michael@0: case JDCT_ISLOW: michael@0: /* For LL&M IDCT method, divisors are equal to raw quantization michael@0: * coefficients multiplied by 8 (to counteract scaling). michael@0: */ michael@0: if (fdct->divisors[qtblno] == NULL) { michael@0: fdct->divisors[qtblno] = (DCTELEM *) michael@0: (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, michael@0: (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); michael@0: } michael@0: dtbl = fdct->divisors[qtblno]; michael@0: for (i = 0; i < DCTSIZE2; i++) { michael@0: if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) michael@0: && fdct->quantize == jsimd_quantize) michael@0: fdct->quantize = quantize; michael@0: } michael@0: break; michael@0: #endif michael@0: #ifdef DCT_IFAST_SUPPORTED michael@0: case JDCT_IFAST: michael@0: { michael@0: /* For AA&N IDCT method, divisors are equal to quantization michael@0: * coefficients scaled by scalefactor[row]*scalefactor[col], where michael@0: * scalefactor[0] = 1 michael@0: * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 michael@0: * We apply a further scale factor of 8. michael@0: */ michael@0: #define CONST_BITS 14 michael@0: static const INT16 aanscales[DCTSIZE2] = { michael@0: /* precomputed values scaled up by 14 bits */ michael@0: 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, michael@0: 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, michael@0: 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, michael@0: 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, michael@0: 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, michael@0: 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, michael@0: 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, michael@0: 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 michael@0: }; michael@0: SHIFT_TEMPS michael@0: michael@0: if (fdct->divisors[qtblno] == NULL) { michael@0: fdct->divisors[qtblno] = (DCTELEM *) michael@0: (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, michael@0: (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); michael@0: } michael@0: dtbl = fdct->divisors[qtblno]; michael@0: for (i = 0; i < DCTSIZE2; i++) { michael@0: if(!compute_reciprocal( michael@0: DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], michael@0: (INT32) aanscales[i]), michael@0: CONST_BITS-3), &dtbl[i]) michael@0: && fdct->quantize == jsimd_quantize) michael@0: fdct->quantize = quantize; michael@0: } michael@0: } michael@0: break; michael@0: #endif michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: case JDCT_FLOAT: michael@0: { michael@0: /* For float AA&N IDCT method, divisors are equal to quantization michael@0: * coefficients scaled by scalefactor[row]*scalefactor[col], where michael@0: * scalefactor[0] = 1 michael@0: * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 michael@0: * We apply a further scale factor of 8. michael@0: * What's actually stored is 1/divisor so that the inner loop can michael@0: * use a multiplication rather than a division. michael@0: */ michael@0: FAST_FLOAT * fdtbl; michael@0: int row, col; michael@0: static const double aanscalefactor[DCTSIZE] = { michael@0: 1.0, 1.387039845, 1.306562965, 1.175875602, michael@0: 1.0, 0.785694958, 0.541196100, 0.275899379 michael@0: }; michael@0: michael@0: if (fdct->float_divisors[qtblno] == NULL) { michael@0: fdct->float_divisors[qtblno] = (FAST_FLOAT *) michael@0: (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, michael@0: DCTSIZE2 * SIZEOF(FAST_FLOAT)); michael@0: } michael@0: fdtbl = fdct->float_divisors[qtblno]; michael@0: i = 0; michael@0: for (row = 0; row < DCTSIZE; row++) { michael@0: for (col = 0; col < DCTSIZE; col++) { michael@0: fdtbl[i] = (FAST_FLOAT) michael@0: (1.0 / (((double) qtbl->quantval[i] * michael@0: aanscalefactor[row] * aanscalefactor[col] * 8.0))); michael@0: i++; michael@0: } michael@0: } michael@0: } michael@0: break; michael@0: #endif michael@0: default: michael@0: ERREXIT(cinfo, JERR_NOT_COMPILED); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: /* michael@0: * Load data into workspace, applying unsigned->signed conversion. michael@0: */ michael@0: michael@0: METHODDEF(void) michael@0: convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) michael@0: { michael@0: register DCTELEM *workspaceptr; michael@0: register JSAMPROW elemptr; michael@0: register int elemr; michael@0: michael@0: workspaceptr = workspace; michael@0: for (elemr = 0; elemr < DCTSIZE; elemr++) { michael@0: elemptr = sample_data[elemr] + start_col; michael@0: michael@0: #if DCTSIZE == 8 /* unroll the inner loop */ michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: #else michael@0: { michael@0: register int elemc; michael@0: for (elemc = DCTSIZE; elemc > 0; elemc--) michael@0: *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; michael@0: } michael@0: #endif michael@0: } michael@0: } michael@0: michael@0: michael@0: /* michael@0: * Quantize/descale the coefficients, and store into coef_blocks[]. michael@0: */ michael@0: michael@0: METHODDEF(void) michael@0: quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) michael@0: { michael@0: int i; michael@0: DCTELEM temp; michael@0: UDCTELEM recip, corr, shift; michael@0: UDCTELEM2 product; michael@0: JCOEFPTR output_ptr = coef_block; michael@0: michael@0: for (i = 0; i < DCTSIZE2; i++) { michael@0: temp = workspace[i]; michael@0: recip = divisors[i + DCTSIZE2 * 0]; michael@0: corr = divisors[i + DCTSIZE2 * 1]; michael@0: shift = divisors[i + DCTSIZE2 * 3]; michael@0: michael@0: if (temp < 0) { michael@0: temp = -temp; michael@0: product = (UDCTELEM2)(temp + corr) * recip; michael@0: product >>= shift + sizeof(DCTELEM)*8; michael@0: temp = product; michael@0: temp = -temp; michael@0: } else { michael@0: product = (UDCTELEM2)(temp + corr) * recip; michael@0: product >>= shift + sizeof(DCTELEM)*8; michael@0: temp = product; michael@0: } michael@0: michael@0: output_ptr[i] = (JCOEF) temp; michael@0: } michael@0: } michael@0: michael@0: michael@0: /* michael@0: * Perform forward DCT on one or more blocks of a component. michael@0: * michael@0: * The input samples are taken from the sample_data[] array starting at michael@0: * position start_row/start_col, and moving to the right for any additional michael@0: * blocks. The quantized coefficients are returned in coef_blocks[]. michael@0: */ michael@0: michael@0: METHODDEF(void) michael@0: forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, michael@0: JSAMPARRAY sample_data, JBLOCKROW coef_blocks, michael@0: JDIMENSION start_row, JDIMENSION start_col, michael@0: JDIMENSION num_blocks) michael@0: /* This version is used for integer DCT implementations. */ michael@0: { michael@0: /* This routine is heavily used, so it's worth coding it tightly. */ michael@0: my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; michael@0: DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; michael@0: DCTELEM * workspace; michael@0: JDIMENSION bi; michael@0: michael@0: /* Make sure the compiler doesn't look up these every pass */ michael@0: forward_DCT_method_ptr do_dct = fdct->dct; michael@0: convsamp_method_ptr do_convsamp = fdct->convsamp; michael@0: quantize_method_ptr do_quantize = fdct->quantize; michael@0: workspace = fdct->workspace; michael@0: michael@0: sample_data += start_row; /* fold in the vertical offset once */ michael@0: michael@0: for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { michael@0: /* Load data into workspace, applying unsigned->signed conversion */ michael@0: (*do_convsamp) (sample_data, start_col, workspace); michael@0: michael@0: /* Perform the DCT */ michael@0: (*do_dct) (workspace); michael@0: michael@0: /* Quantize/descale the coefficients, and store into coef_blocks[] */ michael@0: (*do_quantize) (coef_blocks[bi], divisors, workspace); michael@0: } michael@0: } michael@0: michael@0: michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: michael@0: michael@0: METHODDEF(void) michael@0: convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace) michael@0: { michael@0: register FAST_FLOAT *workspaceptr; michael@0: register JSAMPROW elemptr; michael@0: register int elemr; michael@0: michael@0: workspaceptr = workspace; michael@0: for (elemr = 0; elemr < DCTSIZE; elemr++) { michael@0: elemptr = sample_data[elemr] + start_col; michael@0: #if DCTSIZE == 8 /* unroll the inner loop */ michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: #else michael@0: { michael@0: register int elemc; michael@0: for (elemc = DCTSIZE; elemc > 0; elemc--) michael@0: *workspaceptr++ = (FAST_FLOAT) michael@0: (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); michael@0: } michael@0: #endif michael@0: } michael@0: } michael@0: michael@0: michael@0: METHODDEF(void) michael@0: quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace) michael@0: { michael@0: register FAST_FLOAT temp; michael@0: register int i; michael@0: register JCOEFPTR output_ptr = coef_block; michael@0: michael@0: for (i = 0; i < DCTSIZE2; i++) { michael@0: /* Apply the quantization and scaling factor */ michael@0: temp = workspace[i] * divisors[i]; michael@0: michael@0: /* Round to nearest integer. michael@0: * Since C does not specify the direction of rounding for negative michael@0: * quotients, we have to force the dividend positive for portability. michael@0: * The maximum coefficient size is +-16K (for 12-bit data), so this michael@0: * code should work for either 16-bit or 32-bit ints. michael@0: */ michael@0: output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); michael@0: } michael@0: } michael@0: michael@0: michael@0: METHODDEF(void) michael@0: forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, michael@0: JSAMPARRAY sample_data, JBLOCKROW coef_blocks, michael@0: JDIMENSION start_row, JDIMENSION start_col, michael@0: JDIMENSION num_blocks) michael@0: /* This version is used for floating-point DCT implementations. */ michael@0: { michael@0: /* This routine is heavily used, so it's worth coding it tightly. */ michael@0: my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; michael@0: FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; michael@0: FAST_FLOAT * workspace; michael@0: JDIMENSION bi; michael@0: michael@0: michael@0: /* Make sure the compiler doesn't look up these every pass */ michael@0: float_DCT_method_ptr do_dct = fdct->float_dct; michael@0: float_convsamp_method_ptr do_convsamp = fdct->float_convsamp; michael@0: float_quantize_method_ptr do_quantize = fdct->float_quantize; michael@0: workspace = fdct->float_workspace; michael@0: michael@0: sample_data += start_row; /* fold in the vertical offset once */ michael@0: michael@0: for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { michael@0: /* Load data into workspace, applying unsigned->signed conversion */ michael@0: (*do_convsamp) (sample_data, start_col, workspace); michael@0: michael@0: /* Perform the DCT */ michael@0: (*do_dct) (workspace); michael@0: michael@0: /* Quantize/descale the coefficients, and store into coef_blocks[] */ michael@0: (*do_quantize) (coef_blocks[bi], divisors, workspace); michael@0: } michael@0: } michael@0: michael@0: #endif /* DCT_FLOAT_SUPPORTED */ michael@0: michael@0: michael@0: /* michael@0: * Initialize FDCT manager. michael@0: */ michael@0: michael@0: GLOBAL(void) michael@0: jinit_forward_dct (j_compress_ptr cinfo) michael@0: { michael@0: my_fdct_ptr fdct; michael@0: int i; michael@0: michael@0: fdct = (my_fdct_ptr) michael@0: (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, michael@0: SIZEOF(my_fdct_controller)); michael@0: cinfo->fdct = (struct jpeg_forward_dct *) fdct; michael@0: fdct->pub.start_pass = start_pass_fdctmgr; michael@0: michael@0: /* First determine the DCT... */ michael@0: switch (cinfo->dct_method) { michael@0: #ifdef DCT_ISLOW_SUPPORTED michael@0: case JDCT_ISLOW: michael@0: fdct->pub.forward_DCT = forward_DCT; michael@0: if (jsimd_can_fdct_islow()) michael@0: fdct->dct = jsimd_fdct_islow; michael@0: else michael@0: fdct->dct = jpeg_fdct_islow; michael@0: break; michael@0: #endif michael@0: #ifdef DCT_IFAST_SUPPORTED michael@0: case JDCT_IFAST: michael@0: fdct->pub.forward_DCT = forward_DCT; michael@0: if (jsimd_can_fdct_ifast()) michael@0: fdct->dct = jsimd_fdct_ifast; michael@0: else michael@0: fdct->dct = jpeg_fdct_ifast; michael@0: break; michael@0: #endif michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: case JDCT_FLOAT: michael@0: fdct->pub.forward_DCT = forward_DCT_float; michael@0: if (jsimd_can_fdct_float()) michael@0: fdct->float_dct = jsimd_fdct_float; michael@0: else michael@0: fdct->float_dct = jpeg_fdct_float; michael@0: break; michael@0: #endif michael@0: default: michael@0: ERREXIT(cinfo, JERR_NOT_COMPILED); michael@0: break; michael@0: } michael@0: michael@0: /* ...then the supporting stages. */ michael@0: switch (cinfo->dct_method) { michael@0: #ifdef DCT_ISLOW_SUPPORTED michael@0: case JDCT_ISLOW: michael@0: #endif michael@0: #ifdef DCT_IFAST_SUPPORTED michael@0: case JDCT_IFAST: michael@0: #endif michael@0: #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) michael@0: if (jsimd_can_convsamp()) michael@0: fdct->convsamp = jsimd_convsamp; michael@0: else michael@0: fdct->convsamp = convsamp; michael@0: if (jsimd_can_quantize()) michael@0: fdct->quantize = jsimd_quantize; michael@0: else michael@0: fdct->quantize = quantize; michael@0: break; michael@0: #endif michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: case JDCT_FLOAT: michael@0: if (jsimd_can_convsamp_float()) michael@0: fdct->float_convsamp = jsimd_convsamp_float; michael@0: else michael@0: fdct->float_convsamp = convsamp_float; michael@0: if (jsimd_can_quantize_float()) michael@0: fdct->float_quantize = jsimd_quantize_float; michael@0: else michael@0: fdct->float_quantize = quantize_float; michael@0: break; michael@0: #endif michael@0: default: michael@0: ERREXIT(cinfo, JERR_NOT_COMPILED); michael@0: break; michael@0: } michael@0: michael@0: /* Allocate workspace memory */ michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: if (cinfo->dct_method == JDCT_FLOAT) michael@0: fdct->float_workspace = (FAST_FLOAT *) michael@0: (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, michael@0: SIZEOF(FAST_FLOAT) * DCTSIZE2); michael@0: else michael@0: #endif michael@0: fdct->workspace = (DCTELEM *) michael@0: (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, michael@0: SIZEOF(DCTELEM) * DCTSIZE2); michael@0: michael@0: /* Mark divisor tables unallocated */ michael@0: for (i = 0; i < NUM_QUANT_TBLS; i++) { michael@0: fdct->divisors[i] = NULL; michael@0: #ifdef DCT_FLOAT_SUPPORTED michael@0: fdct->float_divisors[i] = NULL; michael@0: #endif michael@0: } michael@0: }