1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libjpeg/jcdctmgr.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,643 @@ 1.4 +/* 1.5 + * jcdctmgr.c 1.6 + * 1.7 + * This file was part of the Independent JPEG Group's software: 1.8 + * Copyright (C) 1994-1996, Thomas G. Lane. 1.9 + * libjpeg-turbo Modifications: 1.10 + * Copyright (C) 1999-2006, MIYASAKA Masaru. 1.11 + * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 1.12 + * Copyright (C) 2011 D. R. Commander 1.13 + * For conditions of distribution and use, see the accompanying README file. 1.14 + * 1.15 + * This file contains the forward-DCT management logic. 1.16 + * This code selects a particular DCT implementation to be used, 1.17 + * and it performs related housekeeping chores including coefficient 1.18 + * quantization. 1.19 + */ 1.20 + 1.21 +#define JPEG_INTERNALS 1.22 +#include "jinclude.h" 1.23 +#include "jpeglib.h" 1.24 +#include "jdct.h" /* Private declarations for DCT subsystem */ 1.25 +#include "jsimddct.h" 1.26 + 1.27 + 1.28 +/* Private subobject for this module */ 1.29 + 1.30 +typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); 1.31 +typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); 1.32 + 1.33 +typedef JMETHOD(void, convsamp_method_ptr, 1.34 + (JSAMPARRAY sample_data, JDIMENSION start_col, 1.35 + DCTELEM * workspace)); 1.36 +typedef JMETHOD(void, float_convsamp_method_ptr, 1.37 + (JSAMPARRAY sample_data, JDIMENSION start_col, 1.38 + FAST_FLOAT *workspace)); 1.39 + 1.40 +typedef JMETHOD(void, quantize_method_ptr, 1.41 + (JCOEFPTR coef_block, DCTELEM * divisors, 1.42 + DCTELEM * workspace)); 1.43 +typedef JMETHOD(void, float_quantize_method_ptr, 1.44 + (JCOEFPTR coef_block, FAST_FLOAT * divisors, 1.45 + FAST_FLOAT * workspace)); 1.46 + 1.47 +METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); 1.48 + 1.49 +typedef struct { 1.50 + struct jpeg_forward_dct pub; /* public fields */ 1.51 + 1.52 + /* Pointer to the DCT routine actually in use */ 1.53 + forward_DCT_method_ptr dct; 1.54 + convsamp_method_ptr convsamp; 1.55 + quantize_method_ptr quantize; 1.56 + 1.57 + /* The actual post-DCT divisors --- not identical to the quant table 1.58 + * entries, because of scaling (especially for an unnormalized DCT). 1.59 + * Each table is given in normal array order. 1.60 + */ 1.61 + DCTELEM * divisors[NUM_QUANT_TBLS]; 1.62 + 1.63 + /* work area for FDCT subroutine */ 1.64 + DCTELEM * workspace; 1.65 + 1.66 +#ifdef DCT_FLOAT_SUPPORTED 1.67 + /* Same as above for the floating-point case. */ 1.68 + float_DCT_method_ptr float_dct; 1.69 + float_convsamp_method_ptr float_convsamp; 1.70 + float_quantize_method_ptr float_quantize; 1.71 + FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; 1.72 + FAST_FLOAT * float_workspace; 1.73 +#endif 1.74 +} my_fdct_controller; 1.75 + 1.76 +typedef my_fdct_controller * my_fdct_ptr; 1.77 + 1.78 + 1.79 +/* 1.80 + * Find the highest bit in an integer through binary search. 1.81 + */ 1.82 +LOCAL(int) 1.83 +flss (UINT16 val) 1.84 +{ 1.85 + int bit; 1.86 + 1.87 + bit = 16; 1.88 + 1.89 + if (!val) 1.90 + return 0; 1.91 + 1.92 + if (!(val & 0xff00)) { 1.93 + bit -= 8; 1.94 + val <<= 8; 1.95 + } 1.96 + if (!(val & 0xf000)) { 1.97 + bit -= 4; 1.98 + val <<= 4; 1.99 + } 1.100 + if (!(val & 0xc000)) { 1.101 + bit -= 2; 1.102 + val <<= 2; 1.103 + } 1.104 + if (!(val & 0x8000)) { 1.105 + bit -= 1; 1.106 + val <<= 1; 1.107 + } 1.108 + 1.109 + return bit; 1.110 +} 1.111 + 1.112 +/* 1.113 + * Compute values to do a division using reciprocal. 1.114 + * 1.115 + * This implementation is based on an algorithm described in 1.116 + * "How to optimize for the Pentium family of microprocessors" 1.117 + * (http://www.agner.org/assem/). 1.118 + * More information about the basic algorithm can be found in 1.119 + * the paper "Integer Division Using Reciprocals" by Robert Alverson. 1.120 + * 1.121 + * The basic idea is to replace x/d by x * d^-1. In order to store 1.122 + * d^-1 with enough precision we shift it left a few places. It turns 1.123 + * out that this algoright gives just enough precision, and also fits 1.124 + * into DCTELEM: 1.125 + * 1.126 + * b = (the number of significant bits in divisor) - 1 1.127 + * r = (word size) + b 1.128 + * f = 2^r / divisor 1.129 + * 1.130 + * f will not be an integer for most cases, so we need to compensate 1.131 + * for the rounding error introduced: 1.132 + * 1.133 + * no fractional part: 1.134 + * 1.135 + * result = input >> r 1.136 + * 1.137 + * fractional part of f < 0.5: 1.138 + * 1.139 + * round f down to nearest integer 1.140 + * result = ((input + 1) * f) >> r 1.141 + * 1.142 + * fractional part of f > 0.5: 1.143 + * 1.144 + * round f up to nearest integer 1.145 + * result = (input * f) >> r 1.146 + * 1.147 + * This is the original algorithm that gives truncated results. But we 1.148 + * want properly rounded results, so we replace "input" with 1.149 + * "input + divisor/2". 1.150 + * 1.151 + * In order to allow SIMD implementations we also tweak the values to 1.152 + * allow the same calculation to be made at all times: 1.153 + * 1.154 + * dctbl[0] = f rounded to nearest integer 1.155 + * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5) 1.156 + * dctbl[2] = 1 << ((word size) * 2 - r) 1.157 + * dctbl[3] = r - (word size) 1.158 + * 1.159 + * dctbl[2] is for stupid instruction sets where the shift operation 1.160 + * isn't member wise (e.g. MMX). 1.161 + * 1.162 + * The reason dctbl[2] and dctbl[3] reduce the shift with (word size) 1.163 + * is that most SIMD implementations have a "multiply and store top 1.164 + * half" operation. 1.165 + * 1.166 + * Lastly, we store each of the values in their own table instead 1.167 + * of in a consecutive manner, yet again in order to allow SIMD 1.168 + * routines. 1.169 + */ 1.170 +LOCAL(int) 1.171 +compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) 1.172 +{ 1.173 + UDCTELEM2 fq, fr; 1.174 + UDCTELEM c; 1.175 + int b, r; 1.176 + 1.177 + b = flss(divisor) - 1; 1.178 + r = sizeof(DCTELEM) * 8 + b; 1.179 + 1.180 + fq = ((UDCTELEM2)1 << r) / divisor; 1.181 + fr = ((UDCTELEM2)1 << r) % divisor; 1.182 + 1.183 + c = divisor / 2; /* for rounding */ 1.184 + 1.185 + if (fr == 0) { /* divisor is power of two */ 1.186 + /* fq will be one bit too large to fit in DCTELEM, so adjust */ 1.187 + fq >>= 1; 1.188 + r--; 1.189 + } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ 1.190 + c++; 1.191 + } else { /* fractional part is > 0.5 */ 1.192 + fq++; 1.193 + } 1.194 + 1.195 + dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ 1.196 + dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ 1.197 + dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ 1.198 + dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ 1.199 + 1.200 + if(r <= 16) return 0; 1.201 + else return 1; 1.202 +} 1.203 + 1.204 +/* 1.205 + * Initialize for a processing pass. 1.206 + * Verify that all referenced Q-tables are present, and set up 1.207 + * the divisor table for each one. 1.208 + * In the current implementation, DCT of all components is done during 1.209 + * the first pass, even if only some components will be output in the 1.210 + * first scan. Hence all components should be examined here. 1.211 + */ 1.212 + 1.213 +METHODDEF(void) 1.214 +start_pass_fdctmgr (j_compress_ptr cinfo) 1.215 +{ 1.216 + my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 1.217 + int ci, qtblno, i; 1.218 + jpeg_component_info *compptr; 1.219 + JQUANT_TBL * qtbl; 1.220 + DCTELEM * dtbl; 1.221 + 1.222 + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; 1.223 + ci++, compptr++) { 1.224 + qtblno = compptr->quant_tbl_no; 1.225 + /* Make sure specified quantization table is present */ 1.226 + if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || 1.227 + cinfo->quant_tbl_ptrs[qtblno] == NULL) 1.228 + ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); 1.229 + qtbl = cinfo->quant_tbl_ptrs[qtblno]; 1.230 + /* Compute divisors for this quant table */ 1.231 + /* We may do this more than once for same table, but it's not a big deal */ 1.232 + switch (cinfo->dct_method) { 1.233 +#ifdef DCT_ISLOW_SUPPORTED 1.234 + case JDCT_ISLOW: 1.235 + /* For LL&M IDCT method, divisors are equal to raw quantization 1.236 + * coefficients multiplied by 8 (to counteract scaling). 1.237 + */ 1.238 + if (fdct->divisors[qtblno] == NULL) { 1.239 + fdct->divisors[qtblno] = (DCTELEM *) 1.240 + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1.241 + (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); 1.242 + } 1.243 + dtbl = fdct->divisors[qtblno]; 1.244 + for (i = 0; i < DCTSIZE2; i++) { 1.245 + if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) 1.246 + && fdct->quantize == jsimd_quantize) 1.247 + fdct->quantize = quantize; 1.248 + } 1.249 + break; 1.250 +#endif 1.251 +#ifdef DCT_IFAST_SUPPORTED 1.252 + case JDCT_IFAST: 1.253 + { 1.254 + /* For AA&N IDCT method, divisors are equal to quantization 1.255 + * coefficients scaled by scalefactor[row]*scalefactor[col], where 1.256 + * scalefactor[0] = 1 1.257 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 1.258 + * We apply a further scale factor of 8. 1.259 + */ 1.260 +#define CONST_BITS 14 1.261 + static const INT16 aanscales[DCTSIZE2] = { 1.262 + /* precomputed values scaled up by 14 bits */ 1.263 + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 1.264 + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 1.265 + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 1.266 + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 1.267 + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 1.268 + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 1.269 + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 1.270 + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 1.271 + }; 1.272 + SHIFT_TEMPS 1.273 + 1.274 + if (fdct->divisors[qtblno] == NULL) { 1.275 + fdct->divisors[qtblno] = (DCTELEM *) 1.276 + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1.277 + (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); 1.278 + } 1.279 + dtbl = fdct->divisors[qtblno]; 1.280 + for (i = 0; i < DCTSIZE2; i++) { 1.281 + if(!compute_reciprocal( 1.282 + DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], 1.283 + (INT32) aanscales[i]), 1.284 + CONST_BITS-3), &dtbl[i]) 1.285 + && fdct->quantize == jsimd_quantize) 1.286 + fdct->quantize = quantize; 1.287 + } 1.288 + } 1.289 + break; 1.290 +#endif 1.291 +#ifdef DCT_FLOAT_SUPPORTED 1.292 + case JDCT_FLOAT: 1.293 + { 1.294 + /* For float AA&N IDCT method, divisors are equal to quantization 1.295 + * coefficients scaled by scalefactor[row]*scalefactor[col], where 1.296 + * scalefactor[0] = 1 1.297 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 1.298 + * We apply a further scale factor of 8. 1.299 + * What's actually stored is 1/divisor so that the inner loop can 1.300 + * use a multiplication rather than a division. 1.301 + */ 1.302 + FAST_FLOAT * fdtbl; 1.303 + int row, col; 1.304 + static const double aanscalefactor[DCTSIZE] = { 1.305 + 1.0, 1.387039845, 1.306562965, 1.175875602, 1.306 + 1.0, 0.785694958, 0.541196100, 0.275899379 1.307 + }; 1.308 + 1.309 + if (fdct->float_divisors[qtblno] == NULL) { 1.310 + fdct->float_divisors[qtblno] = (FAST_FLOAT *) 1.311 + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1.312 + DCTSIZE2 * SIZEOF(FAST_FLOAT)); 1.313 + } 1.314 + fdtbl = fdct->float_divisors[qtblno]; 1.315 + i = 0; 1.316 + for (row = 0; row < DCTSIZE; row++) { 1.317 + for (col = 0; col < DCTSIZE; col++) { 1.318 + fdtbl[i] = (FAST_FLOAT) 1.319 + (1.0 / (((double) qtbl->quantval[i] * 1.320 + aanscalefactor[row] * aanscalefactor[col] * 8.0))); 1.321 + i++; 1.322 + } 1.323 + } 1.324 + } 1.325 + break; 1.326 +#endif 1.327 + default: 1.328 + ERREXIT(cinfo, JERR_NOT_COMPILED); 1.329 + break; 1.330 + } 1.331 + } 1.332 +} 1.333 + 1.334 + 1.335 +/* 1.336 + * Load data into workspace, applying unsigned->signed conversion. 1.337 + */ 1.338 + 1.339 +METHODDEF(void) 1.340 +convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) 1.341 +{ 1.342 + register DCTELEM *workspaceptr; 1.343 + register JSAMPROW elemptr; 1.344 + register int elemr; 1.345 + 1.346 + workspaceptr = workspace; 1.347 + for (elemr = 0; elemr < DCTSIZE; elemr++) { 1.348 + elemptr = sample_data[elemr] + start_col; 1.349 + 1.350 +#if DCTSIZE == 8 /* unroll the inner loop */ 1.351 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.352 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.353 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.354 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.355 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.356 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.357 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.358 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.359 +#else 1.360 + { 1.361 + register int elemc; 1.362 + for (elemc = DCTSIZE; elemc > 0; elemc--) 1.363 + *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 1.364 + } 1.365 +#endif 1.366 + } 1.367 +} 1.368 + 1.369 + 1.370 +/* 1.371 + * Quantize/descale the coefficients, and store into coef_blocks[]. 1.372 + */ 1.373 + 1.374 +METHODDEF(void) 1.375 +quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) 1.376 +{ 1.377 + int i; 1.378 + DCTELEM temp; 1.379 + UDCTELEM recip, corr, shift; 1.380 + UDCTELEM2 product; 1.381 + JCOEFPTR output_ptr = coef_block; 1.382 + 1.383 + for (i = 0; i < DCTSIZE2; i++) { 1.384 + temp = workspace[i]; 1.385 + recip = divisors[i + DCTSIZE2 * 0]; 1.386 + corr = divisors[i + DCTSIZE2 * 1]; 1.387 + shift = divisors[i + DCTSIZE2 * 3]; 1.388 + 1.389 + if (temp < 0) { 1.390 + temp = -temp; 1.391 + product = (UDCTELEM2)(temp + corr) * recip; 1.392 + product >>= shift + sizeof(DCTELEM)*8; 1.393 + temp = product; 1.394 + temp = -temp; 1.395 + } else { 1.396 + product = (UDCTELEM2)(temp + corr) * recip; 1.397 + product >>= shift + sizeof(DCTELEM)*8; 1.398 + temp = product; 1.399 + } 1.400 + 1.401 + output_ptr[i] = (JCOEF) temp; 1.402 + } 1.403 +} 1.404 + 1.405 + 1.406 +/* 1.407 + * Perform forward DCT on one or more blocks of a component. 1.408 + * 1.409 + * The input samples are taken from the sample_data[] array starting at 1.410 + * position start_row/start_col, and moving to the right for any additional 1.411 + * blocks. The quantized coefficients are returned in coef_blocks[]. 1.412 + */ 1.413 + 1.414 +METHODDEF(void) 1.415 +forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, 1.416 + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 1.417 + JDIMENSION start_row, JDIMENSION start_col, 1.418 + JDIMENSION num_blocks) 1.419 +/* This version is used for integer DCT implementations. */ 1.420 +{ 1.421 + /* This routine is heavily used, so it's worth coding it tightly. */ 1.422 + my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 1.423 + DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; 1.424 + DCTELEM * workspace; 1.425 + JDIMENSION bi; 1.426 + 1.427 + /* Make sure the compiler doesn't look up these every pass */ 1.428 + forward_DCT_method_ptr do_dct = fdct->dct; 1.429 + convsamp_method_ptr do_convsamp = fdct->convsamp; 1.430 + quantize_method_ptr do_quantize = fdct->quantize; 1.431 + workspace = fdct->workspace; 1.432 + 1.433 + sample_data += start_row; /* fold in the vertical offset once */ 1.434 + 1.435 + for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 1.436 + /* Load data into workspace, applying unsigned->signed conversion */ 1.437 + (*do_convsamp) (sample_data, start_col, workspace); 1.438 + 1.439 + /* Perform the DCT */ 1.440 + (*do_dct) (workspace); 1.441 + 1.442 + /* Quantize/descale the coefficients, and store into coef_blocks[] */ 1.443 + (*do_quantize) (coef_blocks[bi], divisors, workspace); 1.444 + } 1.445 +} 1.446 + 1.447 + 1.448 +#ifdef DCT_FLOAT_SUPPORTED 1.449 + 1.450 + 1.451 +METHODDEF(void) 1.452 +convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace) 1.453 +{ 1.454 + register FAST_FLOAT *workspaceptr; 1.455 + register JSAMPROW elemptr; 1.456 + register int elemr; 1.457 + 1.458 + workspaceptr = workspace; 1.459 + for (elemr = 0; elemr < DCTSIZE; elemr++) { 1.460 + elemptr = sample_data[elemr] + start_col; 1.461 +#if DCTSIZE == 8 /* unroll the inner loop */ 1.462 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.463 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.464 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.465 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.466 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.467 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.468 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.469 + *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.470 +#else 1.471 + { 1.472 + register int elemc; 1.473 + for (elemc = DCTSIZE; elemc > 0; elemc--) 1.474 + *workspaceptr++ = (FAST_FLOAT) 1.475 + (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 1.476 + } 1.477 +#endif 1.478 + } 1.479 +} 1.480 + 1.481 + 1.482 +METHODDEF(void) 1.483 +quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace) 1.484 +{ 1.485 + register FAST_FLOAT temp; 1.486 + register int i; 1.487 + register JCOEFPTR output_ptr = coef_block; 1.488 + 1.489 + for (i = 0; i < DCTSIZE2; i++) { 1.490 + /* Apply the quantization and scaling factor */ 1.491 + temp = workspace[i] * divisors[i]; 1.492 + 1.493 + /* Round to nearest integer. 1.494 + * Since C does not specify the direction of rounding for negative 1.495 + * quotients, we have to force the dividend positive for portability. 1.496 + * The maximum coefficient size is +-16K (for 12-bit data), so this 1.497 + * code should work for either 16-bit or 32-bit ints. 1.498 + */ 1.499 + output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); 1.500 + } 1.501 +} 1.502 + 1.503 + 1.504 +METHODDEF(void) 1.505 +forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, 1.506 + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 1.507 + JDIMENSION start_row, JDIMENSION start_col, 1.508 + JDIMENSION num_blocks) 1.509 +/* This version is used for floating-point DCT implementations. */ 1.510 +{ 1.511 + /* This routine is heavily used, so it's worth coding it tightly. */ 1.512 + my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 1.513 + FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; 1.514 + FAST_FLOAT * workspace; 1.515 + JDIMENSION bi; 1.516 + 1.517 + 1.518 + /* Make sure the compiler doesn't look up these every pass */ 1.519 + float_DCT_method_ptr do_dct = fdct->float_dct; 1.520 + float_convsamp_method_ptr do_convsamp = fdct->float_convsamp; 1.521 + float_quantize_method_ptr do_quantize = fdct->float_quantize; 1.522 + workspace = fdct->float_workspace; 1.523 + 1.524 + sample_data += start_row; /* fold in the vertical offset once */ 1.525 + 1.526 + for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 1.527 + /* Load data into workspace, applying unsigned->signed conversion */ 1.528 + (*do_convsamp) (sample_data, start_col, workspace); 1.529 + 1.530 + /* Perform the DCT */ 1.531 + (*do_dct) (workspace); 1.532 + 1.533 + /* Quantize/descale the coefficients, and store into coef_blocks[] */ 1.534 + (*do_quantize) (coef_blocks[bi], divisors, workspace); 1.535 + } 1.536 +} 1.537 + 1.538 +#endif /* DCT_FLOAT_SUPPORTED */ 1.539 + 1.540 + 1.541 +/* 1.542 + * Initialize FDCT manager. 1.543 + */ 1.544 + 1.545 +GLOBAL(void) 1.546 +jinit_forward_dct (j_compress_ptr cinfo) 1.547 +{ 1.548 + my_fdct_ptr fdct; 1.549 + int i; 1.550 + 1.551 + fdct = (my_fdct_ptr) 1.552 + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1.553 + SIZEOF(my_fdct_controller)); 1.554 + cinfo->fdct = (struct jpeg_forward_dct *) fdct; 1.555 + fdct->pub.start_pass = start_pass_fdctmgr; 1.556 + 1.557 + /* First determine the DCT... */ 1.558 + switch (cinfo->dct_method) { 1.559 +#ifdef DCT_ISLOW_SUPPORTED 1.560 + case JDCT_ISLOW: 1.561 + fdct->pub.forward_DCT = forward_DCT; 1.562 + if (jsimd_can_fdct_islow()) 1.563 + fdct->dct = jsimd_fdct_islow; 1.564 + else 1.565 + fdct->dct = jpeg_fdct_islow; 1.566 + break; 1.567 +#endif 1.568 +#ifdef DCT_IFAST_SUPPORTED 1.569 + case JDCT_IFAST: 1.570 + fdct->pub.forward_DCT = forward_DCT; 1.571 + if (jsimd_can_fdct_ifast()) 1.572 + fdct->dct = jsimd_fdct_ifast; 1.573 + else 1.574 + fdct->dct = jpeg_fdct_ifast; 1.575 + break; 1.576 +#endif 1.577 +#ifdef DCT_FLOAT_SUPPORTED 1.578 + case JDCT_FLOAT: 1.579 + fdct->pub.forward_DCT = forward_DCT_float; 1.580 + if (jsimd_can_fdct_float()) 1.581 + fdct->float_dct = jsimd_fdct_float; 1.582 + else 1.583 + fdct->float_dct = jpeg_fdct_float; 1.584 + break; 1.585 +#endif 1.586 + default: 1.587 + ERREXIT(cinfo, JERR_NOT_COMPILED); 1.588 + break; 1.589 + } 1.590 + 1.591 + /* ...then the supporting stages. */ 1.592 + switch (cinfo->dct_method) { 1.593 +#ifdef DCT_ISLOW_SUPPORTED 1.594 + case JDCT_ISLOW: 1.595 +#endif 1.596 +#ifdef DCT_IFAST_SUPPORTED 1.597 + case JDCT_IFAST: 1.598 +#endif 1.599 +#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) 1.600 + if (jsimd_can_convsamp()) 1.601 + fdct->convsamp = jsimd_convsamp; 1.602 + else 1.603 + fdct->convsamp = convsamp; 1.604 + if (jsimd_can_quantize()) 1.605 + fdct->quantize = jsimd_quantize; 1.606 + else 1.607 + fdct->quantize = quantize; 1.608 + break; 1.609 +#endif 1.610 +#ifdef DCT_FLOAT_SUPPORTED 1.611 + case JDCT_FLOAT: 1.612 + if (jsimd_can_convsamp_float()) 1.613 + fdct->float_convsamp = jsimd_convsamp_float; 1.614 + else 1.615 + fdct->float_convsamp = convsamp_float; 1.616 + if (jsimd_can_quantize_float()) 1.617 + fdct->float_quantize = jsimd_quantize_float; 1.618 + else 1.619 + fdct->float_quantize = quantize_float; 1.620 + break; 1.621 +#endif 1.622 + default: 1.623 + ERREXIT(cinfo, JERR_NOT_COMPILED); 1.624 + break; 1.625 + } 1.626 + 1.627 + /* Allocate workspace memory */ 1.628 +#ifdef DCT_FLOAT_SUPPORTED 1.629 + if (cinfo->dct_method == JDCT_FLOAT) 1.630 + fdct->float_workspace = (FAST_FLOAT *) 1.631 + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1.632 + SIZEOF(FAST_FLOAT) * DCTSIZE2); 1.633 + else 1.634 +#endif 1.635 + fdct->workspace = (DCTELEM *) 1.636 + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1.637 + SIZEOF(DCTELEM) * DCTSIZE2); 1.638 + 1.639 + /* Mark divisor tables unallocated */ 1.640 + for (i = 0; i < NUM_QUANT_TBLS; i++) { 1.641 + fdct->divisors[i] = NULL; 1.642 +#ifdef DCT_FLOAT_SUPPORTED 1.643 + fdct->float_divisors[i] = NULL; 1.644 +#endif 1.645 + } 1.646 +}