1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/qcms/transform_util.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,514 @@ 1.4 +#define _ISOC99_SOURCE /* for INFINITY */ 1.5 + 1.6 +#include <math.h> 1.7 +#include <assert.h> 1.8 +#include <string.h> //memcpy 1.9 +#include "qcmsint.h" 1.10 +#include "transform_util.h" 1.11 +#include "matrix.h" 1.12 + 1.13 +#if !defined(INFINITY) 1.14 +#define INFINITY HUGE_VAL 1.15 +#endif 1.16 + 1.17 +#define PARAMETRIC_CURVE_TYPE 0x70617261 //'para' 1.18 + 1.19 +/* value must be a value between 0 and 1 */ 1.20 +//XXX: is the above a good restriction to have? 1.21 +// the output range of this functions is 0..1 1.22 +float lut_interp_linear(double input_value, uint16_t *table, int length) 1.23 +{ 1.24 + int upper, lower; 1.25 + float value; 1.26 + input_value = input_value * (length - 1); // scale to length of the array 1.27 + upper = ceil(input_value); 1.28 + lower = floor(input_value); 1.29 + //XXX: can we be more performant here? 1.30 + value = table[upper]*(1. - (upper - input_value)) + table[lower]*(upper - input_value); 1.31 + /* scale the value */ 1.32 + return value * (1.f/65535.f); 1.33 +} 1.34 + 1.35 +/* same as above but takes and returns a uint16_t value representing a range from 0..1 */ 1.36 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length) 1.37 +{ 1.38 + /* Start scaling input_value to the length of the array: 65535*(length-1). 1.39 + * We'll divide out the 65535 next */ 1.40 + uint32_t value = (input_value * (length - 1)); 1.41 + uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */ 1.42 + uint32_t lower = value / 65535; /* equivalent to floor(value/65535) */ 1.43 + /* interp is the distance from upper to value scaled to 0..65535 */ 1.44 + uint32_t interp = value % 65535; 1.45 + 1.46 + value = (table[upper]*(interp) + table[lower]*(65535 - interp))/65535; // 0..65535*65535 1.47 + 1.48 + return value; 1.49 +} 1.50 + 1.51 +/* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX 1.52 + * and returns a uint8_t value representing a range from 0..1 */ 1.53 +static 1.54 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length) 1.55 +{ 1.56 + /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1). 1.57 + * We'll divide out the PRECACHE_OUTPUT_MAX next */ 1.58 + uint32_t value = (input_value * (length - 1)); 1.59 + 1.60 + /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */ 1.61 + uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX; 1.62 + /* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */ 1.63 + uint32_t lower = value / PRECACHE_OUTPUT_MAX; 1.64 + /* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */ 1.65 + uint32_t interp = value % PRECACHE_OUTPUT_MAX; 1.66 + 1.67 + /* the table values range from 0..65535 */ 1.68 + value = (table[upper]*(interp) + table[lower]*(PRECACHE_OUTPUT_MAX - interp)); // 0..(65535*PRECACHE_OUTPUT_MAX) 1.69 + 1.70 + /* round and scale */ 1.71 + value += (PRECACHE_OUTPUT_MAX*65535/255)/2; 1.72 + value /= (PRECACHE_OUTPUT_MAX*65535/255); // scale to 0..255 1.73 + return value; 1.74 +} 1.75 + 1.76 +/* value must be a value between 0 and 1 */ 1.77 +//XXX: is the above a good restriction to have? 1.78 +float lut_interp_linear_float(float value, float *table, int length) 1.79 +{ 1.80 + int upper, lower; 1.81 + value = value * (length - 1); 1.82 + upper = ceilf(value); 1.83 + lower = floorf(value); 1.84 + //XXX: can we be more performant here? 1.85 + value = table[upper]*(1. - (upper - value)) + table[lower]*(upper - value); 1.86 + /* scale the value */ 1.87 + return value; 1.88 +} 1.89 + 1.90 +#if 0 1.91 +/* if we use a different representation i.e. one that goes from 0 to 0x1000 we can be more efficient 1.92 + * because we can avoid the divisions and use a shifting instead */ 1.93 +/* same as above but takes and returns a uint16_t value representing a range from 0..1 */ 1.94 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length) 1.95 +{ 1.96 + uint32_t value = (input_value * (length - 1)); 1.97 + uint32_t upper = (value + 4095) / 4096; /* equivalent to ceil(value/4096) */ 1.98 + uint32_t lower = value / 4096; /* equivalent to floor(value/4096) */ 1.99 + uint32_t interp = value % 4096; 1.100 + 1.101 + value = (table[upper]*(interp) + table[lower]*(4096 - interp))/4096; // 0..4096*4096 1.102 + 1.103 + return value; 1.104 +} 1.105 +#endif 1.106 + 1.107 +void compute_curve_gamma_table_type1(float gamma_table[256], uint16_t gamma) 1.108 +{ 1.109 + unsigned int i; 1.110 + float gamma_float = u8Fixed8Number_to_float(gamma); 1.111 + for (i = 0; i < 256; i++) { 1.112 + // 0..1^(0..255 + 255/256) will always be between 0 and 1 1.113 + gamma_table[i] = pow(i/255., gamma_float); 1.114 + } 1.115 +} 1.116 + 1.117 +void compute_curve_gamma_table_type2(float gamma_table[256], uint16_t *table, int length) 1.118 +{ 1.119 + unsigned int i; 1.120 + for (i = 0; i < 256; i++) { 1.121 + gamma_table[i] = lut_interp_linear(i/255., table, length); 1.122 + } 1.123 +} 1.124 + 1.125 +void compute_curve_gamma_table_type_parametric(float gamma_table[256], float parameter[7], int count) 1.126 +{ 1.127 + size_t X; 1.128 + float interval; 1.129 + float a, b, c, e, f; 1.130 + float y = parameter[0]; 1.131 + if (count == 0) { 1.132 + a = 1; 1.133 + b = 0; 1.134 + c = 0; 1.135 + e = 0; 1.136 + f = 0; 1.137 + interval = -INFINITY; 1.138 + } else if(count == 1) { 1.139 + a = parameter[1]; 1.140 + b = parameter[2]; 1.141 + c = 0; 1.142 + e = 0; 1.143 + f = 0; 1.144 + interval = -1 * parameter[2] / parameter[1]; 1.145 + } else if(count == 2) { 1.146 + a = parameter[1]; 1.147 + b = parameter[2]; 1.148 + c = 0; 1.149 + e = parameter[3]; 1.150 + f = parameter[3]; 1.151 + interval = -1 * parameter[2] / parameter[1]; 1.152 + } else if(count == 3) { 1.153 + a = parameter[1]; 1.154 + b = parameter[2]; 1.155 + c = parameter[3]; 1.156 + e = -c; 1.157 + f = 0; 1.158 + interval = parameter[4]; 1.159 + } else if(count == 4) { 1.160 + a = parameter[1]; 1.161 + b = parameter[2]; 1.162 + c = parameter[3]; 1.163 + e = parameter[5] - c; 1.164 + f = parameter[6]; 1.165 + interval = parameter[4]; 1.166 + } else { 1.167 + assert(0 && "invalid parametric function type."); 1.168 + a = 1; 1.169 + b = 0; 1.170 + c = 0; 1.171 + e = 0; 1.172 + f = 0; 1.173 + interval = -INFINITY; 1.174 + } 1.175 + for (X = 0; X < 256; X++) { 1.176 + if (X >= interval) { 1.177 + // XXX The equations are not exactly as definied in the spec but are 1.178 + // algebraic equivilent. 1.179 + // TODO Should division by 255 be for the whole expression. 1.180 + gamma_table[X] = clamp_float(pow(a * X / 255. + b, y) + c + e); 1.181 + } else { 1.182 + gamma_table[X] = clamp_float(c * X / 255. + f); 1.183 + } 1.184 + } 1.185 +} 1.186 + 1.187 +void compute_curve_gamma_table_type0(float gamma_table[256]) 1.188 +{ 1.189 + unsigned int i; 1.190 + for (i = 0; i < 256; i++) { 1.191 + gamma_table[i] = i/255.; 1.192 + } 1.193 +} 1.194 + 1.195 +float *build_input_gamma_table(struct curveType *TRC) 1.196 +{ 1.197 + float *gamma_table; 1.198 + 1.199 + if (!TRC) return NULL; 1.200 + gamma_table = malloc(sizeof(float)*256); 1.201 + if (gamma_table) { 1.202 + if (TRC->type == PARAMETRIC_CURVE_TYPE) { 1.203 + compute_curve_gamma_table_type_parametric(gamma_table, TRC->parameter, TRC->count); 1.204 + } else { 1.205 + if (TRC->count == 0) { 1.206 + compute_curve_gamma_table_type0(gamma_table); 1.207 + } else if (TRC->count == 1) { 1.208 + compute_curve_gamma_table_type1(gamma_table, TRC->data[0]); 1.209 + } else { 1.210 + compute_curve_gamma_table_type2(gamma_table, TRC->data, TRC->count); 1.211 + } 1.212 + } 1.213 + } 1.214 + return gamma_table; 1.215 +} 1.216 + 1.217 +struct matrix build_colorant_matrix(qcms_profile *p) 1.218 +{ 1.219 + struct matrix result; 1.220 + result.m[0][0] = s15Fixed16Number_to_float(p->redColorant.X); 1.221 + result.m[0][1] = s15Fixed16Number_to_float(p->greenColorant.X); 1.222 + result.m[0][2] = s15Fixed16Number_to_float(p->blueColorant.X); 1.223 + result.m[1][0] = s15Fixed16Number_to_float(p->redColorant.Y); 1.224 + result.m[1][1] = s15Fixed16Number_to_float(p->greenColorant.Y); 1.225 + result.m[1][2] = s15Fixed16Number_to_float(p->blueColorant.Y); 1.226 + result.m[2][0] = s15Fixed16Number_to_float(p->redColorant.Z); 1.227 + result.m[2][1] = s15Fixed16Number_to_float(p->greenColorant.Z); 1.228 + result.m[2][2] = s15Fixed16Number_to_float(p->blueColorant.Z); 1.229 + result.invalid = false; 1.230 + return result; 1.231 +} 1.232 + 1.233 +/* The following code is copied nearly directly from lcms. 1.234 + * I think it could be much better. For example, Argyll seems to have better code in 1.235 + * icmTable_lookup_bwd and icmTable_setup_bwd. However, for now this is a quick way 1.236 + * to a working solution and allows for easy comparing with lcms. */ 1.237 +uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int length) 1.238 +{ 1.239 + int l = 1; 1.240 + int r = 0x10000; 1.241 + int x = 0, res; // 'int' Give spacing for negative values 1.242 + int NumZeroes, NumPoles; 1.243 + int cell0, cell1; 1.244 + double val2; 1.245 + double y0, y1, x0, x1; 1.246 + double a, b, f; 1.247 + 1.248 + // July/27 2001 - Expanded to handle degenerated curves with an arbitrary 1.249 + // number of elements containing 0 at the begining of the table (Zeroes) 1.250 + // and another arbitrary number of poles (FFFFh) at the end. 1.251 + // First the zero and pole extents are computed, then value is compared. 1.252 + 1.253 + NumZeroes = 0; 1.254 + while (LutTable[NumZeroes] == 0 && NumZeroes < length-1) 1.255 + NumZeroes++; 1.256 + 1.257 + // There are no zeros at the beginning and we are trying to find a zero, so 1.258 + // return anything. It seems zero would be the less destructive choice 1.259 + /* I'm not sure that this makes sense, but oh well... */ 1.260 + if (NumZeroes == 0 && Value == 0) 1.261 + return 0; 1.262 + 1.263 + NumPoles = 0; 1.264 + while (LutTable[length-1- NumPoles] == 0xFFFF && NumPoles < length-1) 1.265 + NumPoles++; 1.266 + 1.267 + // Does the curve belong to this case? 1.268 + if (NumZeroes > 1 || NumPoles > 1) 1.269 + { 1.270 + int a, b; 1.271 + 1.272 + // Identify if value fall downto 0 or FFFF zone 1.273 + if (Value == 0) return 0; 1.274 + // if (Value == 0xFFFF) return 0xFFFF; 1.275 + 1.276 + // else restrict to valid zone 1.277 + 1.278 + a = ((NumZeroes-1) * 0xFFFF) / (length-1); 1.279 + b = ((length-1 - NumPoles) * 0xFFFF) / (length-1); 1.280 + 1.281 + l = a - 1; 1.282 + r = b + 1; 1.283 + } 1.284 + 1.285 + 1.286 + // Seems not a degenerated case... apply binary search 1.287 + 1.288 + while (r > l) { 1.289 + 1.290 + x = (l + r) / 2; 1.291 + 1.292 + res = (int) lut_interp_linear16((uint16_fract_t) (x-1), LutTable, length); 1.293 + 1.294 + if (res == Value) { 1.295 + 1.296 + // Found exact match. 1.297 + 1.298 + return (uint16_fract_t) (x - 1); 1.299 + } 1.300 + 1.301 + if (res > Value) r = x - 1; 1.302 + else l = x + 1; 1.303 + } 1.304 + 1.305 + // Not found, should we interpolate? 1.306 + 1.307 + 1.308 + // Get surrounding nodes 1.309 + 1.310 + val2 = (length-1) * ((double) (x - 1) / 65535.0); 1.311 + 1.312 + cell0 = (int) floor(val2); 1.313 + cell1 = (int) ceil(val2); 1.314 + 1.315 + if (cell0 == cell1) return (uint16_fract_t) x; 1.316 + 1.317 + y0 = LutTable[cell0] ; 1.318 + x0 = (65535.0 * cell0) / (length-1); 1.319 + 1.320 + y1 = LutTable[cell1] ; 1.321 + x1 = (65535.0 * cell1) / (length-1); 1.322 + 1.323 + a = (y1 - y0) / (x1 - x0); 1.324 + b = y0 - a * x0; 1.325 + 1.326 + if (fabs(a) < 0.01) return (uint16_fract_t) x; 1.327 + 1.328 + f = ((Value - b) / a); 1.329 + 1.330 + if (f < 0.0) return (uint16_fract_t) 0; 1.331 + if (f >= 65535.0) return (uint16_fract_t) 0xFFFF; 1.332 + 1.333 + return (uint16_fract_t) floor(f + 0.5); 1.334 + 1.335 +} 1.336 + 1.337 +/* 1.338 + The number of entries needed to invert a lookup table should not 1.339 + necessarily be the same as the original number of entries. This is 1.340 + especially true of lookup tables that have a small number of entries. 1.341 + 1.342 + For example: 1.343 + Using a table like: 1.344 + {0, 3104, 14263, 34802, 65535} 1.345 + invert_lut will produce an inverse of: 1.346 + {3, 34459, 47529, 56801, 65535} 1.347 + which has an maximum error of about 9855 (pixel difference of ~38.346) 1.348 + 1.349 + For now, we punt the decision of output size to the caller. */ 1.350 +static uint16_t *invert_lut(uint16_t *table, int length, int out_length) 1.351 +{ 1.352 + int i; 1.353 + /* for now we invert the lut by creating a lut of size out_length 1.354 + * and attempting to lookup a value for each entry using lut_inverse_interp16 */ 1.355 + uint16_t *output = malloc(sizeof(uint16_t)*out_length); 1.356 + if (!output) 1.357 + return NULL; 1.358 + 1.359 + for (i = 0; i < out_length; i++) { 1.360 + double x = ((double) i * 65535.) / (double) (out_length - 1); 1.361 + uint16_fract_t input = floor(x + .5); 1.362 + output[i] = lut_inverse_interp16(input, table, length); 1.363 + } 1.364 + return output; 1.365 +} 1.366 + 1.367 +static void compute_precache_pow(uint8_t *output, float gamma) 1.368 +{ 1.369 + uint32_t v = 0; 1.370 + for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) { 1.371 + //XXX: don't do integer/float conversion... and round? 1.372 + output[v] = 255. * pow(v/(double)PRECACHE_OUTPUT_MAX, gamma); 1.373 + } 1.374 +} 1.375 + 1.376 +void compute_precache_lut(uint8_t *output, uint16_t *table, int length) 1.377 +{ 1.378 + uint32_t v = 0; 1.379 + for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) { 1.380 + output[v] = lut_interp_linear_precache_output(v, table, length); 1.381 + } 1.382 +} 1.383 + 1.384 +void compute_precache_linear(uint8_t *output) 1.385 +{ 1.386 + uint32_t v = 0; 1.387 + for (v = 0; v < PRECACHE_OUTPUT_SIZE; v++) { 1.388 + //XXX: round? 1.389 + output[v] = v / (PRECACHE_OUTPUT_SIZE/256); 1.390 + } 1.391 +} 1.392 + 1.393 +qcms_bool compute_precache(struct curveType *trc, uint8_t *output) 1.394 +{ 1.395 + 1.396 + if (trc->type == PARAMETRIC_CURVE_TYPE) { 1.397 + float gamma_table[256]; 1.398 + uint16_t gamma_table_uint[256]; 1.399 + uint16_t i; 1.400 + uint16_t *inverted; 1.401 + int inverted_size = 256; 1.402 + 1.403 + compute_curve_gamma_table_type_parametric(gamma_table, trc->parameter, trc->count); 1.404 + for(i = 0; i < 256; i++) { 1.405 + gamma_table_uint[i] = (uint16_t)(gamma_table[i] * 65535); 1.406 + } 1.407 + 1.408 + //XXX: the choice of a minimum of 256 here is not backed by any theory, 1.409 + // measurement or data, howeve r it is what lcms uses. 1.410 + // the maximum number we would need is 65535 because that's the 1.411 + // accuracy used for computing the pre cache table 1.412 + if (inverted_size < 256) 1.413 + inverted_size = 256; 1.414 + 1.415 + inverted = invert_lut(gamma_table_uint, 256, inverted_size); 1.416 + if (!inverted) 1.417 + return false; 1.418 + compute_precache_lut(output, inverted, inverted_size); 1.419 + free(inverted); 1.420 + } else { 1.421 + if (trc->count == 0) { 1.422 + compute_precache_linear(output); 1.423 + } else if (trc->count == 1) { 1.424 + compute_precache_pow(output, 1./u8Fixed8Number_to_float(trc->data[0])); 1.425 + } else { 1.426 + uint16_t *inverted; 1.427 + int inverted_size = trc->count; 1.428 + //XXX: the choice of a minimum of 256 here is not backed by any theory, 1.429 + // measurement or data, howeve r it is what lcms uses. 1.430 + // the maximum number we would need is 65535 because that's the 1.431 + // accuracy used for computing the pre cache table 1.432 + if (inverted_size < 256) 1.433 + inverted_size = 256; 1.434 + 1.435 + inverted = invert_lut(trc->data, trc->count, inverted_size); 1.436 + if (!inverted) 1.437 + return false; 1.438 + compute_precache_lut(output, inverted, inverted_size); 1.439 + free(inverted); 1.440 + } 1.441 + } 1.442 + return true; 1.443 +} 1.444 + 1.445 + 1.446 +static uint16_t *build_linear_table(int length) 1.447 +{ 1.448 + int i; 1.449 + uint16_t *output = malloc(sizeof(uint16_t)*length); 1.450 + if (!output) 1.451 + return NULL; 1.452 + 1.453 + for (i = 0; i < length; i++) { 1.454 + double x = ((double) i * 65535.) / (double) (length - 1); 1.455 + uint16_fract_t input = floor(x + .5); 1.456 + output[i] = input; 1.457 + } 1.458 + return output; 1.459 +} 1.460 + 1.461 +static uint16_t *build_pow_table(float gamma, int length) 1.462 +{ 1.463 + int i; 1.464 + uint16_t *output = malloc(sizeof(uint16_t)*length); 1.465 + if (!output) 1.466 + return NULL; 1.467 + 1.468 + for (i = 0; i < length; i++) { 1.469 + uint16_fract_t result; 1.470 + double x = ((double) i) / (double) (length - 1); 1.471 + x = pow(x, gamma); //XXX turn this conversion into a function 1.472 + result = floor(x*65535. + .5); 1.473 + output[i] = result; 1.474 + } 1.475 + return output; 1.476 +} 1.477 + 1.478 +void build_output_lut(struct curveType *trc, 1.479 + uint16_t **output_gamma_lut, size_t *output_gamma_lut_length) 1.480 +{ 1.481 + if (trc->type == PARAMETRIC_CURVE_TYPE) { 1.482 + float gamma_table[256]; 1.483 + uint16_t i; 1.484 + uint16_t *output = malloc(sizeof(uint16_t)*256); 1.485 + 1.486 + if (!output) { 1.487 + *output_gamma_lut = NULL; 1.488 + return; 1.489 + } 1.490 + 1.491 + compute_curve_gamma_table_type_parametric(gamma_table, trc->parameter, trc->count); 1.492 + *output_gamma_lut_length = 256; 1.493 + for(i = 0; i < 256; i++) { 1.494 + output[i] = (uint16_t)(gamma_table[i] * 65535); 1.495 + } 1.496 + *output_gamma_lut = output; 1.497 + } else { 1.498 + if (trc->count == 0) { 1.499 + *output_gamma_lut = build_linear_table(4096); 1.500 + *output_gamma_lut_length = 4096; 1.501 + } else if (trc->count == 1) { 1.502 + float gamma = 1./u8Fixed8Number_to_float(trc->data[0]); 1.503 + *output_gamma_lut = build_pow_table(gamma, 4096); 1.504 + *output_gamma_lut_length = 4096; 1.505 + } else { 1.506 + //XXX: the choice of a minimum of 256 here is not backed by any theory, 1.507 + // measurement or data, however it is what lcms uses. 1.508 + *output_gamma_lut_length = trc->count; 1.509 + if (*output_gamma_lut_length < 256) 1.510 + *output_gamma_lut_length = 256; 1.511 + 1.512 + *output_gamma_lut = invert_lut(trc->data, trc->count, *output_gamma_lut_length); 1.513 + } 1.514 + } 1.515 + 1.516 +} 1.517 +