Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 /* vim: set ts=8 sw=8 noexpandtab: */
2 // qcms
3 // Copyright (C) 2009 Mozilla Corporation
4 // Copyright (C) 1998-2007 Marti Maria
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining
7 // a copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the Software
11 // is furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include <stdlib.h>
25 #include <math.h>
26 #include <assert.h>
27 #include <string.h> //memcpy
28 #include "qcmsint.h"
29 #include "chain.h"
30 #include "matrix.h"
31 #include "transform_util.h"
33 /* for MSVC, GCC, Intel, and Sun compilers */
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64)
35 #define X86
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */
38 /**
39 * AltiVec detection for PowerPC CPUs
40 * In case we have a method of detecting do the runtime detection.
41 * Otherwise statically choose the AltiVec path in case the compiler
42 * was told to build with AltiVec support.
43 */
44 #if (defined(__POWERPC__) || defined(__powerpc__))
45 #if defined(__linux__)
46 #include <unistd.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <elf.h>
50 #include <linux/auxvec.h>
51 #include <asm/cputable.h>
52 #include <link.h>
54 static inline qcms_bool have_altivec() {
55 static int available = -1;
56 int new_avail = 0;
57 ElfW(auxv_t) auxv;
58 ssize_t count;
59 int fd, i;
61 if (available != -1)
62 return (available != 0 ? true : false);
64 fd = open("/proc/self/auxv", O_RDONLY);
65 if (fd < 0)
66 goto out;
67 do {
68 count = read(fd, &auxv, sizeof(auxv));
69 if (count < 0)
70 goto out_close;
72 if (auxv.a_type == AT_HWCAP) {
73 new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC);
74 goto out_close;
75 }
76 } while (auxv.a_type != AT_NULL);
78 out_close:
79 close(fd);
80 out:
81 available = new_avail;
82 return (available != 0 ? true : false);
83 }
84 #elif defined(__APPLE__) && defined(__MACH__)
85 #include <sys/sysctl.h>
87 /**
88 * rip-off from ffmpeg AltiVec detection code.
89 * this code also appears on Apple's AltiVec pages.
90 */
91 static inline qcms_bool have_altivec() {
92 int sels[2] = {CTL_HW, HW_VECTORUNIT};
93 static int available = -1;
94 size_t len = sizeof(available);
95 int err;
97 if (available != -1)
98 return (available != 0 ? true : false);
100 err = sysctl(sels, 2, &available, &len, NULL, 0);
102 if (err == 0)
103 if (available != 0)
104 return true;
106 return false;
107 }
108 #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__)
109 #define have_altivec() true
110 #else
111 #define have_altivec() false
112 #endif
113 #endif // (defined(__POWERPC__) || defined(__powerpc__))
115 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
116 // This is just an approximation, I am not handling all the non-linear
117 // aspects of the RGB to XYZ process, and assumming that the gamma correction
118 // has transitive property in the tranformation chain.
119 //
120 // the alghoritm:
121 //
122 // - First I build the absolute conversion matrix using
123 // primaries in XYZ. This matrix is next inverted
124 // - Then I eval the source white point across this matrix
125 // obtaining the coeficients of the transformation
126 // - Then, I apply these coeficients to the original matrix
127 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs)
128 {
129 struct matrix primaries;
130 struct matrix primaries_invert;
131 struct matrix result;
132 struct vector white_point;
133 struct vector coefs;
135 double xn, yn;
136 double xr, yr;
137 double xg, yg;
138 double xb, yb;
140 xn = white.x;
141 yn = white.y;
143 if (yn == 0.0)
144 return matrix_invalid();
146 xr = primrs.red.x;
147 yr = primrs.red.y;
148 xg = primrs.green.x;
149 yg = primrs.green.y;
150 xb = primrs.blue.x;
151 yb = primrs.blue.y;
153 primaries.m[0][0] = xr;
154 primaries.m[0][1] = xg;
155 primaries.m[0][2] = xb;
157 primaries.m[1][0] = yr;
158 primaries.m[1][1] = yg;
159 primaries.m[1][2] = yb;
161 primaries.m[2][0] = 1 - xr - yr;
162 primaries.m[2][1] = 1 - xg - yg;
163 primaries.m[2][2] = 1 - xb - yb;
164 primaries.invalid = false;
166 white_point.v[0] = xn/yn;
167 white_point.v[1] = 1.;
168 white_point.v[2] = (1.0-xn-yn)/yn;
170 primaries_invert = matrix_invert(primaries);
172 coefs = matrix_eval(primaries_invert, white_point);
174 result.m[0][0] = coefs.v[0]*xr;
175 result.m[0][1] = coefs.v[1]*xg;
176 result.m[0][2] = coefs.v[2]*xb;
178 result.m[1][0] = coefs.v[0]*yr;
179 result.m[1][1] = coefs.v[1]*yg;
180 result.m[1][2] = coefs.v[2]*yb;
182 result.m[2][0] = coefs.v[0]*(1.-xr-yr);
183 result.m[2][1] = coefs.v[1]*(1.-xg-yg);
184 result.m[2][2] = coefs.v[2]*(1.-xb-yb);
185 result.invalid = primaries_invert.invalid;
187 return result;
188 }
190 struct CIE_XYZ {
191 double X;
192 double Y;
193 double Z;
194 };
196 /* CIE Illuminant D50 */
197 static const struct CIE_XYZ D50_XYZ = {
198 0.9642,
199 1.0000,
200 0.8249
201 };
203 /* from lcms: xyY2XYZ()
204 * corresponds to argyll: icmYxy2XYZ() */
205 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source)
206 {
207 struct CIE_XYZ dest;
208 dest.X = (source.x / source.y) * source.Y;
209 dest.Y = source.Y;
210 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y;
211 return dest;
212 }
214 /* from lcms: ComputeChromaticAdaption */
215 // Compute chromatic adaption matrix using chad as cone matrix
216 static struct matrix
217 compute_chromatic_adaption(struct CIE_XYZ source_white_point,
218 struct CIE_XYZ dest_white_point,
219 struct matrix chad)
220 {
221 struct matrix chad_inv;
222 struct vector cone_source_XYZ, cone_source_rgb;
223 struct vector cone_dest_XYZ, cone_dest_rgb;
224 struct matrix cone, tmp;
226 tmp = chad;
227 chad_inv = matrix_invert(tmp);
229 cone_source_XYZ.v[0] = source_white_point.X;
230 cone_source_XYZ.v[1] = source_white_point.Y;
231 cone_source_XYZ.v[2] = source_white_point.Z;
233 cone_dest_XYZ.v[0] = dest_white_point.X;
234 cone_dest_XYZ.v[1] = dest_white_point.Y;
235 cone_dest_XYZ.v[2] = dest_white_point.Z;
237 cone_source_rgb = matrix_eval(chad, cone_source_XYZ);
238 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ);
240 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0];
241 cone.m[0][1] = 0;
242 cone.m[0][2] = 0;
243 cone.m[1][0] = 0;
244 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1];
245 cone.m[1][2] = 0;
246 cone.m[2][0] = 0;
247 cone.m[2][1] = 0;
248 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2];
249 cone.invalid = false;
251 // Normalize
252 return matrix_multiply(chad_inv, matrix_multiply(cone, chad));
253 }
255 /* from lcms: cmsAdaptionMatrix */
256 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
257 // Bradford is assumed
258 static struct matrix
259 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
260 {
261 struct matrix lam_rigg = {{ // Bradford matrix
262 { 0.8951, 0.2664, -0.1614 },
263 { -0.7502, 1.7135, 0.0367 },
264 { 0.0389, -0.0685, 1.0296 }
265 }};
266 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
267 }
269 /* from lcms: cmsAdaptMatrixToD50 */
270 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt)
271 {
272 struct CIE_XYZ Dn;
273 struct matrix Bradford;
275 if (source_white_pt.y == 0.0)
276 return matrix_invalid();
278 Dn = xyY2XYZ(source_white_pt);
280 Bradford = adaption_matrix(Dn, D50_XYZ);
281 return matrix_multiply(Bradford, r);
282 }
284 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
285 {
286 struct matrix colorants;
287 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
288 colorants = adapt_matrix_to_D50(colorants, white_point);
290 if (colorants.invalid)
291 return false;
293 /* note: there's a transpose type of operation going on here */
294 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]);
295 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]);
296 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]);
298 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]);
299 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]);
300 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]);
302 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]);
303 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]);
304 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]);
306 return true;
307 }
309 qcms_bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries)
310 {
311 *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
312 *colorants = adapt_matrix_to_D50(*colorants, white_point);
314 return (colorants->invalid ? true : false);
315 }
317 #if 0
318 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
319 {
320 int i;
321 float (*mat)[4] = transform->matrix;
322 for (i=0; i<length; i++) {
323 unsigned char device_r = *src++;
324 unsigned char device_g = *src++;
325 unsigned char device_b = *src++;
327 float linear_r = transform->input_gamma_table_r[device_r];
328 float linear_g = transform->input_gamma_table_g[device_g];
329 float linear_b = transform->input_gamma_table_b[device_b];
331 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
332 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
333 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
335 float out_device_r = pow(out_linear_r, transform->out_gamma_r);
336 float out_device_g = pow(out_linear_g, transform->out_gamma_g);
337 float out_device_b = pow(out_linear_b, transform->out_gamma_b);
339 dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r);
340 dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g);
341 dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b);
342 dest += RGB_OUTPUT_COMPONENTS;
343 }
344 }
345 #endif
347 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
348 {
349 unsigned int i;
350 for (i = 0; i < length; i++) {
351 float out_device_r, out_device_g, out_device_b;
352 unsigned char device = *src++;
354 float linear = transform->input_gamma_table_gray[device];
356 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
357 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
358 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
360 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
361 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
362 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
363 dest += RGB_OUTPUT_COMPONENTS;
364 }
365 }
367 /* Alpha is not corrected.
368 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
369 RGB Is?" Tech Memo 17 (December 14, 1998).
370 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
371 */
373 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
374 {
375 unsigned int i;
376 for (i = 0; i < length; i++) {
377 float out_device_r, out_device_g, out_device_b;
378 unsigned char device = *src++;
379 unsigned char alpha = *src++;
381 float linear = transform->input_gamma_table_gray[device];
383 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
384 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
385 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
387 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
388 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
389 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
390 dest[OUTPUT_A_INDEX] = alpha;
391 dest += RGBA_OUTPUT_COMPONENTS;
392 }
393 }
396 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
397 {
398 unsigned int i;
399 for (i = 0; i < length; i++) {
400 unsigned char device = *src++;
401 uint16_t gray;
403 float linear = transform->input_gamma_table_gray[device];
405 /* we could round here... */
406 gray = linear * PRECACHE_OUTPUT_MAX;
408 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
409 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
410 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
411 dest += RGB_OUTPUT_COMPONENTS;
412 }
413 }
415 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
416 {
417 unsigned int i;
418 for (i = 0; i < length; i++) {
419 unsigned char device = *src++;
420 unsigned char alpha = *src++;
421 uint16_t gray;
423 float linear = transform->input_gamma_table_gray[device];
425 /* we could round here... */
426 gray = linear * PRECACHE_OUTPUT_MAX;
428 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
429 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
430 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
431 dest[OUTPUT_A_INDEX] = alpha;
432 dest += RGBA_OUTPUT_COMPONENTS;
433 }
434 }
436 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
437 {
438 unsigned int i;
439 float (*mat)[4] = transform->matrix;
440 for (i = 0; i < length; i++) {
441 unsigned char device_r = *src++;
442 unsigned char device_g = *src++;
443 unsigned char device_b = *src++;
444 uint16_t r, g, b;
446 float linear_r = transform->input_gamma_table_r[device_r];
447 float linear_g = transform->input_gamma_table_g[device_g];
448 float linear_b = transform->input_gamma_table_b[device_b];
450 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
451 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
452 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
454 out_linear_r = clamp_float(out_linear_r);
455 out_linear_g = clamp_float(out_linear_g);
456 out_linear_b = clamp_float(out_linear_b);
458 /* we could round here... */
459 r = out_linear_r * PRECACHE_OUTPUT_MAX;
460 g = out_linear_g * PRECACHE_OUTPUT_MAX;
461 b = out_linear_b * PRECACHE_OUTPUT_MAX;
463 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
464 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
465 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
466 dest += RGB_OUTPUT_COMPONENTS;
467 }
468 }
470 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
471 {
472 unsigned int i;
473 float (*mat)[4] = transform->matrix;
474 for (i = 0; i < length; i++) {
475 unsigned char device_r = *src++;
476 unsigned char device_g = *src++;
477 unsigned char device_b = *src++;
478 unsigned char alpha = *src++;
479 uint16_t r, g, b;
481 float linear_r = transform->input_gamma_table_r[device_r];
482 float linear_g = transform->input_gamma_table_g[device_g];
483 float linear_b = transform->input_gamma_table_b[device_b];
485 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
486 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
487 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
489 out_linear_r = clamp_float(out_linear_r);
490 out_linear_g = clamp_float(out_linear_g);
491 out_linear_b = clamp_float(out_linear_b);
493 /* we could round here... */
494 r = out_linear_r * PRECACHE_OUTPUT_MAX;
495 g = out_linear_g * PRECACHE_OUTPUT_MAX;
496 b = out_linear_b * PRECACHE_OUTPUT_MAX;
498 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
499 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
500 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
501 dest[OUTPUT_A_INDEX] = alpha;
502 dest += RGBA_OUTPUT_COMPONENTS;
503 }
504 }
506 // Not used
507 /*
508 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
509 unsigned int i;
510 int xy_len = 1;
511 int x_len = transform->grid_size;
512 int len = x_len * x_len;
513 float* r_table = transform->r_clut;
514 float* g_table = transform->g_clut;
515 float* b_table = transform->b_clut;
517 for (i = 0; i < length; i++) {
518 unsigned char in_r = *src++;
519 unsigned char in_g = *src++;
520 unsigned char in_b = *src++;
521 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
523 int x = floorf(linear_r * (transform->grid_size-1));
524 int y = floorf(linear_g * (transform->grid_size-1));
525 int z = floorf(linear_b * (transform->grid_size-1));
526 int x_n = ceilf(linear_r * (transform->grid_size-1));
527 int y_n = ceilf(linear_g * (transform->grid_size-1));
528 int z_n = ceilf(linear_b * (transform->grid_size-1));
529 float x_d = linear_r * (transform->grid_size-1) - x;
530 float y_d = linear_g * (transform->grid_size-1) - y;
531 float z_d = linear_b * (transform->grid_size-1) - z;
533 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
534 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
535 float r_y1 = lerp(r_x1, r_x2, y_d);
536 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
537 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
538 float r_y2 = lerp(r_x3, r_x4, y_d);
539 float clut_r = lerp(r_y1, r_y2, z_d);
541 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
542 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
543 float g_y1 = lerp(g_x1, g_x2, y_d);
544 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
545 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
546 float g_y2 = lerp(g_x3, g_x4, y_d);
547 float clut_g = lerp(g_y1, g_y2, z_d);
549 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
550 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
551 float b_y1 = lerp(b_x1, b_x2, y_d);
552 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
553 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
554 float b_y2 = lerp(b_x3, b_x4, y_d);
555 float clut_b = lerp(b_y1, b_y2, z_d);
557 *dest++ = clamp_u8(clut_r*255.0f);
558 *dest++ = clamp_u8(clut_g*255.0f);
559 *dest++ = clamp_u8(clut_b*255.0f);
560 }
561 }
562 */
564 static int int_div_ceil(int value, int div) {
565 return ((value + div - 1) / div);
566 }
568 // Using lcms' tetra interpolation algorithm.
569 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
570 unsigned int i;
571 int xy_len = 1;
572 int x_len = transform->grid_size;
573 int len = x_len * x_len;
574 float* r_table = transform->r_clut;
575 float* g_table = transform->g_clut;
576 float* b_table = transform->b_clut;
577 float c0_r, c1_r, c2_r, c3_r;
578 float c0_g, c1_g, c2_g, c3_g;
579 float c0_b, c1_b, c2_b, c3_b;
580 float clut_r, clut_g, clut_b;
581 for (i = 0; i < length; i++) {
582 unsigned char in_r = *src++;
583 unsigned char in_g = *src++;
584 unsigned char in_b = *src++;
585 unsigned char in_a = *src++;
586 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
588 int x = in_r * (transform->grid_size-1) / 255;
589 int y = in_g * (transform->grid_size-1) / 255;
590 int z = in_b * (transform->grid_size-1) / 255;
591 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
592 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
593 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
594 float rx = linear_r * (transform->grid_size-1) - x;
595 float ry = linear_g * (transform->grid_size-1) - y;
596 float rz = linear_b * (transform->grid_size-1) - z;
598 c0_r = CLU(r_table, x, y, z);
599 c0_g = CLU(g_table, x, y, z);
600 c0_b = CLU(b_table, x, y, z);
602 if( rx >= ry ) {
603 if (ry >= rz) { //rx >= ry && ry >= rz
604 c1_r = CLU(r_table, x_n, y, z) - c0_r;
605 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
606 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
607 c1_g = CLU(g_table, x_n, y, z) - c0_g;
608 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
609 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
610 c1_b = CLU(b_table, x_n, y, z) - c0_b;
611 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
612 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
613 } else {
614 if (rx >= rz) { //rx >= rz && rz >= ry
615 c1_r = CLU(r_table, x_n, y, z) - c0_r;
616 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
617 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
618 c1_g = CLU(g_table, x_n, y, z) - c0_g;
619 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
620 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
621 c1_b = CLU(b_table, x_n, y, z) - c0_b;
622 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
623 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
624 } else { //rz > rx && rx >= ry
625 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
626 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
627 c3_r = CLU(r_table, x, y, z_n) - c0_r;
628 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
629 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
630 c3_g = CLU(g_table, x, y, z_n) - c0_g;
631 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
632 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
633 c3_b = CLU(b_table, x, y, z_n) - c0_b;
634 }
635 }
636 } else {
637 if (rx >= rz) { //ry > rx && rx >= rz
638 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
639 c2_r = CLU(r_table, x, y_n, z) - c0_r;
640 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
641 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
642 c2_g = CLU(g_table, x, y_n, z) - c0_g;
643 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
644 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
645 c2_b = CLU(b_table, x, y_n, z) - c0_b;
646 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
647 } else {
648 if (ry >= rz) { //ry >= rz && rz > rx
649 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
650 c2_r = CLU(r_table, x, y_n, z) - c0_r;
651 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
652 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
653 c2_g = CLU(g_table, x, y_n, z) - c0_g;
654 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
655 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
656 c2_b = CLU(b_table, x, y_n, z) - c0_b;
657 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
658 } else { //rz > ry && ry > rx
659 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
660 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
661 c3_r = CLU(r_table, x, y, z_n) - c0_r;
662 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
663 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
664 c3_g = CLU(g_table, x, y, z_n) - c0_g;
665 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
666 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
667 c3_b = CLU(b_table, x, y, z_n) - c0_b;
668 }
669 }
670 }
672 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
673 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
674 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
676 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
677 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
678 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
679 dest[OUTPUT_A_INDEX] = in_a;
680 dest += RGBA_OUTPUT_COMPONENTS;
681 }
682 }
684 // Using lcms' tetra interpolation code.
685 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
686 unsigned int i;
687 int xy_len = 1;
688 int x_len = transform->grid_size;
689 int len = x_len * x_len;
690 float* r_table = transform->r_clut;
691 float* g_table = transform->g_clut;
692 float* b_table = transform->b_clut;
693 float c0_r, c1_r, c2_r, c3_r;
694 float c0_g, c1_g, c2_g, c3_g;
695 float c0_b, c1_b, c2_b, c3_b;
696 float clut_r, clut_g, clut_b;
697 for (i = 0; i < length; i++) {
698 unsigned char in_r = *src++;
699 unsigned char in_g = *src++;
700 unsigned char in_b = *src++;
701 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
703 int x = in_r * (transform->grid_size-1) / 255;
704 int y = in_g * (transform->grid_size-1) / 255;
705 int z = in_b * (transform->grid_size-1) / 255;
706 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255);
707 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255);
708 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255);
709 float rx = linear_r * (transform->grid_size-1) - x;
710 float ry = linear_g * (transform->grid_size-1) - y;
711 float rz = linear_b * (transform->grid_size-1) - z;
713 c0_r = CLU(r_table, x, y, z);
714 c0_g = CLU(g_table, x, y, z);
715 c0_b = CLU(b_table, x, y, z);
717 if( rx >= ry ) {
718 if (ry >= rz) { //rx >= ry && ry >= rz
719 c1_r = CLU(r_table, x_n, y, z) - c0_r;
720 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
721 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
722 c1_g = CLU(g_table, x_n, y, z) - c0_g;
723 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
724 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
725 c1_b = CLU(b_table, x_n, y, z) - c0_b;
726 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
727 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
728 } else {
729 if (rx >= rz) { //rx >= rz && rz >= ry
730 c1_r = CLU(r_table, x_n, y, z) - c0_r;
731 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
732 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
733 c1_g = CLU(g_table, x_n, y, z) - c0_g;
734 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
735 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
736 c1_b = CLU(b_table, x_n, y, z) - c0_b;
737 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
738 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
739 } else { //rz > rx && rx >= ry
740 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
741 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
742 c3_r = CLU(r_table, x, y, z_n) - c0_r;
743 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
744 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
745 c3_g = CLU(g_table, x, y, z_n) - c0_g;
746 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
747 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
748 c3_b = CLU(b_table, x, y, z_n) - c0_b;
749 }
750 }
751 } else {
752 if (rx >= rz) { //ry > rx && rx >= rz
753 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
754 c2_r = CLU(r_table, x, y_n, z) - c0_r;
755 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
756 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
757 c2_g = CLU(g_table, x, y_n, z) - c0_g;
758 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
759 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
760 c2_b = CLU(b_table, x, y_n, z) - c0_b;
761 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
762 } else {
763 if (ry >= rz) { //ry >= rz && rz > rx
764 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
765 c2_r = CLU(r_table, x, y_n, z) - c0_r;
766 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
767 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
768 c2_g = CLU(g_table, x, y_n, z) - c0_g;
769 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
770 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
771 c2_b = CLU(b_table, x, y_n, z) - c0_b;
772 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
773 } else { //rz > ry && ry > rx
774 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
775 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n);
776 c3_r = CLU(r_table, x, y, z_n) - c0_r;
777 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
778 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n);
779 c3_g = CLU(g_table, x, y, z_n) - c0_g;
780 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
781 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n);
782 c3_b = CLU(b_table, x, y, z_n) - c0_b;
783 }
784 }
785 }
787 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
788 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
789 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
791 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
792 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
793 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
794 dest += RGB_OUTPUT_COMPONENTS;
795 }
796 }
798 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
799 {
800 unsigned int i;
801 float (*mat)[4] = transform->matrix;
802 for (i = 0; i < length; i++) {
803 unsigned char device_r = *src++;
804 unsigned char device_g = *src++;
805 unsigned char device_b = *src++;
806 float out_device_r, out_device_g, out_device_b;
808 float linear_r = transform->input_gamma_table_r[device_r];
809 float linear_g = transform->input_gamma_table_g[device_g];
810 float linear_b = transform->input_gamma_table_b[device_b];
812 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
813 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
814 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
816 out_linear_r = clamp_float(out_linear_r);
817 out_linear_g = clamp_float(out_linear_g);
818 out_linear_b = clamp_float(out_linear_b);
820 out_device_r = lut_interp_linear(out_linear_r,
821 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
822 out_device_g = lut_interp_linear(out_linear_g,
823 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
824 out_device_b = lut_interp_linear(out_linear_b,
825 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
827 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
828 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
829 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
830 dest += RGB_OUTPUT_COMPONENTS;
831 }
832 }
834 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
835 {
836 unsigned int i;
837 float (*mat)[4] = transform->matrix;
838 for (i = 0; i < length; i++) {
839 unsigned char device_r = *src++;
840 unsigned char device_g = *src++;
841 unsigned char device_b = *src++;
842 unsigned char alpha = *src++;
843 float out_device_r, out_device_g, out_device_b;
845 float linear_r = transform->input_gamma_table_r[device_r];
846 float linear_g = transform->input_gamma_table_g[device_g];
847 float linear_b = transform->input_gamma_table_b[device_b];
849 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
850 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
851 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
853 out_linear_r = clamp_float(out_linear_r);
854 out_linear_g = clamp_float(out_linear_g);
855 out_linear_b = clamp_float(out_linear_b);
857 out_device_r = lut_interp_linear(out_linear_r,
858 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
859 out_device_g = lut_interp_linear(out_linear_g,
860 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
861 out_device_b = lut_interp_linear(out_linear_b,
862 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
864 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
865 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
866 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
867 dest[OUTPUT_A_INDEX] = alpha;
868 dest += RGBA_OUTPUT_COMPONENTS;
869 }
870 }
872 #if 0
873 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
874 {
875 int i;
876 float (*mat)[4] = transform->matrix;
877 for (i = 0; i < length; i++) {
878 unsigned char device_r = *src++;
879 unsigned char device_g = *src++;
880 unsigned char device_b = *src++;
882 float linear_r = transform->input_gamma_table_r[device_r];
883 float linear_g = transform->input_gamma_table_g[device_g];
884 float linear_b = transform->input_gamma_table_b[device_b];
886 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b;
887 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
888 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
890 *dest++ = clamp_u8(out_linear_r*255);
891 *dest++ = clamp_u8(out_linear_g*255);
892 *dest++ = clamp_u8(out_linear_b*255);
893 }
894 }
895 #endif
897 /*
898 * If users create and destroy objects on different threads, even if the same
899 * objects aren't used on different threads at the same time, we can still run
900 * in to trouble with refcounts if they aren't atomic.
901 *
902 * This can lead to us prematurely deleting the precache if threads get unlucky
903 * and write the wrong value to the ref count.
904 */
905 static struct precache_output *precache_reference(struct precache_output *p)
906 {
907 qcms_atomic_increment(p->ref_count);
908 return p;
909 }
911 static struct precache_output *precache_create()
912 {
913 struct precache_output *p = malloc(sizeof(struct precache_output));
914 if (p)
915 p->ref_count = 1;
916 return p;
917 }
919 void precache_release(struct precache_output *p)
920 {
921 if (qcms_atomic_decrement(p->ref_count) == 0) {
922 free(p);
923 }
924 }
926 #ifdef HAS_POSIX_MEMALIGN
927 static qcms_transform *transform_alloc(void)
928 {
929 qcms_transform *t;
930 if (!posix_memalign(&t, 16, sizeof(*t))) {
931 return t;
932 } else {
933 return NULL;
934 }
935 }
936 static void transform_free(qcms_transform *t)
937 {
938 free(t);
939 }
940 #else
941 static qcms_transform *transform_alloc(void)
942 {
943 /* transform needs to be aligned on a 16byte boundrary */
944 char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1);
945 /* make room for a pointer to the block returned by calloc */
946 void *transform_start = original_block + sizeof(void*);
947 /* align transform_start */
948 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf);
950 /* store a pointer to the block returned by calloc so that we can free it later */
951 void **(original_block_ptr) = (void**)transform_aligned;
952 if (!original_block)
953 return NULL;
954 original_block_ptr--;
955 *original_block_ptr = original_block;
957 return transform_aligned;
958 }
959 static void transform_free(qcms_transform *t)
960 {
961 /* get at the pointer to the unaligned block returned by calloc */
962 void **p = (void**)t;
963 p--;
964 free(*p);
965 }
966 #endif
968 void qcms_transform_release(qcms_transform *t)
969 {
970 /* ensure we only free the gamma tables once even if there are
971 * multiple references to the same data */
973 if (t->output_table_r)
974 precache_release(t->output_table_r);
975 if (t->output_table_g)
976 precache_release(t->output_table_g);
977 if (t->output_table_b)
978 precache_release(t->output_table_b);
980 free(t->input_gamma_table_r);
981 if (t->input_gamma_table_g != t->input_gamma_table_r)
982 free(t->input_gamma_table_g);
983 if (t->input_gamma_table_g != t->input_gamma_table_r &&
984 t->input_gamma_table_g != t->input_gamma_table_b)
985 free(t->input_gamma_table_b);
987 free(t->input_gamma_table_gray);
989 free(t->output_gamma_lut_r);
990 free(t->output_gamma_lut_g);
991 free(t->output_gamma_lut_b);
993 transform_free(t);
994 }
996 #ifdef X86
997 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
998 // mozilla/jpeg)
999 // -------------------------------------------------------------------------
1000 #if defined(_M_IX86) && defined(_MSC_VER)
1001 #define HAS_CPUID
1002 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
1003 register - I'm not sure if that ever happens on windows, but cpuid isn't
1004 on the critical path so we just preserve the register to be safe and to be
1005 consistent with the non-windows version. */
1006 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
1007 uint32_t a_, b_, c_, d_;
1008 __asm {
1009 xchg ebx, esi
1010 mov eax, fxn
1011 cpuid
1012 mov a_, eax
1013 mov b_, ebx
1014 mov c_, ecx
1015 mov d_, edx
1016 xchg ebx, esi
1017 }
1018 *a = a_;
1019 *b = b_;
1020 *c = c_;
1021 *d = d_;
1022 }
1023 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
1024 #define HAS_CPUID
1025 /* Get us a CPUID function. We can't use ebx because it's the PIC register on
1026 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
1027 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
1029 uint32_t a_, b_, c_, d_;
1030 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
1031 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
1032 *a = a_;
1033 *b = b_;
1034 *c = c_;
1035 *d = d_;
1036 }
1037 #endif
1039 // -------------------------Runtime SSEx Detection-----------------------------
1041 /* MMX is always supported per
1042 * Gecko v1.9.1 minimum CPU requirements */
1043 #define SSE1_EDX_MASK (1UL << 25)
1044 #define SSE2_EDX_MASK (1UL << 26)
1045 #define SSE3_ECX_MASK (1UL << 0)
1047 static int sse_version_available(void)
1048 {
1049 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
1050 /* we know at build time that 64-bit CPUs always have SSE2
1051 * this tells the compiler that non-SSE2 branches will never be
1052 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
1053 return 2;
1054 #elif defined(HAS_CPUID)
1055 static int sse_version = -1;
1056 uint32_t a, b, c, d;
1057 uint32_t function = 0x00000001;
1059 if (sse_version == -1) {
1060 sse_version = 0;
1061 cpuid(function, &a, &b, &c, &d);
1062 if (c & SSE3_ECX_MASK)
1063 sse_version = 3;
1064 else if (d & SSE2_EDX_MASK)
1065 sse_version = 2;
1066 else if (d & SSE1_EDX_MASK)
1067 sse_version = 1;
1068 }
1070 return sse_version;
1071 #else
1072 return 0;
1073 #endif
1074 }
1075 #endif
1077 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
1078 {-0.7502f, 1.7135f, 0.0367f},
1079 { 0.0389f,-0.0685f, 1.0296f}},
1080 false};
1082 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f},
1083 { 0.4323053f, 0.5183603f, 0.0492912f},
1084 {-0.0085287f, 0.0400428f, 0.9684867f}},
1085 false};
1087 // See ICCv4 E.3
1088 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) {
1089 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) /
1090 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] );
1091 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) /
1092 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] );
1093 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) /
1094 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] );
1095 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false};
1096 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) );
1097 }
1099 void qcms_profile_precache_output_transform(qcms_profile *profile)
1100 {
1101 /* we only support precaching on rgb profiles */
1102 if (profile->color_space != RGB_SIGNATURE)
1103 return;
1105 if (qcms_supports_iccv4) {
1106 /* don't precache since we will use the B2A LUT */
1107 if (profile->B2A0)
1108 return;
1110 /* don't precache since we will use the mBA LUT */
1111 if (profile->mBA)
1112 return;
1113 }
1115 /* don't precache if we do not have the TRC curves */
1116 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
1117 return;
1119 if (!profile->output_table_r) {
1120 profile->output_table_r = precache_create();
1121 if (profile->output_table_r &&
1122 !compute_precache(profile->redTRC, profile->output_table_r->data)) {
1123 precache_release(profile->output_table_r);
1124 profile->output_table_r = NULL;
1125 }
1126 }
1127 if (!profile->output_table_g) {
1128 profile->output_table_g = precache_create();
1129 if (profile->output_table_g &&
1130 !compute_precache(profile->greenTRC, profile->output_table_g->data)) {
1131 precache_release(profile->output_table_g);
1132 profile->output_table_g = NULL;
1133 }
1134 }
1135 if (!profile->output_table_b) {
1136 profile->output_table_b = precache_create();
1137 if (profile->output_table_b &&
1138 !compute_precache(profile->blueTRC, profile->output_table_b->data)) {
1139 precache_release(profile->output_table_b);
1140 profile->output_table_b = NULL;
1141 }
1142 }
1143 }
1145 /* Replace the current transformation with a LUT transformation using a given number of sample points */
1146 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out,
1147 int samples, qcms_data_type in_type)
1148 {
1149 /* The range between which 2 consecutive sample points can be used to interpolate */
1150 uint16_t x,y,z;
1151 uint32_t l;
1152 uint32_t lutSize = 3 * samples * samples * samples;
1153 float* src = NULL;
1154 float* dest = NULL;
1155 float* lut = NULL;
1157 src = malloc(lutSize*sizeof(float));
1158 dest = malloc(lutSize*sizeof(float));
1160 if (src && dest) {
1161 /* Prepare a list of points we want to sample */
1162 l = 0;
1163 for (x = 0; x < samples; x++) {
1164 for (y = 0; y < samples; y++) {
1165 for (z = 0; z < samples; z++) {
1166 src[l++] = x / (float)(samples-1);
1167 src[l++] = y / (float)(samples-1);
1168 src[l++] = z / (float)(samples-1);
1169 }
1170 }
1171 }
1173 lut = qcms_chain_transform(in, out, src, dest, lutSize);
1174 if (lut) {
1175 transform->r_clut = &lut[0];
1176 transform->g_clut = &lut[1];
1177 transform->b_clut = &lut[2];
1178 transform->grid_size = samples;
1179 if (in_type == QCMS_DATA_RGBA_8) {
1180 transform->transform_fn = qcms_transform_data_tetra_clut_rgba;
1181 } else {
1182 transform->transform_fn = qcms_transform_data_tetra_clut;
1183 }
1184 }
1185 }
1188 //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
1189 if (src && lut != src) {
1190 free(src);
1191 }
1192 if (dest && lut != dest) {
1193 free(dest);
1194 }
1196 if (lut == NULL) {
1197 return NULL;
1198 }
1199 return transform;
1200 }
1202 #define NO_MEM_TRANSFORM NULL
1204 qcms_transform* qcms_transform_create(
1205 qcms_profile *in, qcms_data_type in_type,
1206 qcms_profile *out, qcms_data_type out_type,
1207 qcms_intent intent)
1208 {
1209 bool precache = false;
1211 qcms_transform *transform = transform_alloc();
1212 if (!transform) {
1213 return NULL;
1214 }
1215 if (out_type != QCMS_DATA_RGB_8 &&
1216 out_type != QCMS_DATA_RGBA_8) {
1217 assert(0 && "output type");
1218 transform_free(transform);
1219 return NULL;
1220 }
1222 if (out->output_table_r &&
1223 out->output_table_g &&
1224 out->output_table_b) {
1225 precache = true;
1226 }
1228 // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance)
1229 if (qcms_supports_iccv4 &&
1230 (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8) &&
1231 (in->A2B0 || out->B2A0 || in->mAB || out->mAB))
1232 {
1233 // Precache the transformation to a CLUT 33x33x33 in size.
1234 // 33 is used by many profiles and works well in pratice.
1235 // This evenly divides 256 into blocks of 8x8x8.
1236 // TODO For transforming small data sets of about 200x200 or less
1237 // precaching should be avoided.
1238 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type);
1239 if (!result) {
1240 assert(0 && "precacheLUT failed");
1241 transform_free(transform);
1242 return NULL;
1243 }
1244 return result;
1245 }
1247 if (precache) {
1248 transform->output_table_r = precache_reference(out->output_table_r);
1249 transform->output_table_g = precache_reference(out->output_table_g);
1250 transform->output_table_b = precache_reference(out->output_table_b);
1251 } else {
1252 if (!out->redTRC || !out->greenTRC || !out->blueTRC) {
1253 qcms_transform_release(transform);
1254 return NO_MEM_TRANSFORM;
1255 }
1256 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length);
1257 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length);
1258 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length);
1259 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) {
1260 qcms_transform_release(transform);
1261 return NO_MEM_TRANSFORM;
1262 }
1263 }
1265 if (in->color_space == RGB_SIGNATURE) {
1266 struct matrix in_matrix, out_matrix, result;
1268 if (in_type != QCMS_DATA_RGB_8 &&
1269 in_type != QCMS_DATA_RGBA_8){
1270 assert(0 && "input type");
1271 transform_free(transform);
1272 return NULL;
1273 }
1274 if (precache) {
1275 #ifdef X86
1276 if (sse_version_available() >= 2) {
1277 if (in_type == QCMS_DATA_RGB_8)
1278 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
1279 else
1280 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
1282 #if !(defined(_MSC_VER) && defined(_M_AMD64))
1283 /* Microsoft Compiler for x64 doesn't support MMX.
1284 * SSE code uses MMX so that we disable on x64 */
1285 } else
1286 if (sse_version_available() >= 1) {
1287 if (in_type == QCMS_DATA_RGB_8)
1288 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
1289 else
1290 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
1291 #endif
1292 } else
1293 #endif
1294 #if (defined(__POWERPC__) || defined(__powerpc__))
1295 if (have_altivec()) {
1296 if (in_type == QCMS_DATA_RGB_8)
1297 transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
1298 else
1299 transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
1300 } else
1301 #endif
1302 {
1303 if (in_type == QCMS_DATA_RGB_8)
1304 transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
1305 else
1306 transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
1307 }
1308 } else {
1309 if (in_type == QCMS_DATA_RGB_8)
1310 transform->transform_fn = qcms_transform_data_rgb_out_lut;
1311 else
1312 transform->transform_fn = qcms_transform_data_rgba_out_lut;
1313 }
1315 //XXX: avoid duplicating tables if we can
1316 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC);
1317 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC);
1318 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC);
1319 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) {
1320 qcms_transform_release(transform);
1321 return NO_MEM_TRANSFORM;
1322 }
1325 /* build combined colorant matrix */
1326 in_matrix = build_colorant_matrix(in);
1327 out_matrix = build_colorant_matrix(out);
1328 out_matrix = matrix_invert(out_matrix);
1329 if (out_matrix.invalid) {
1330 qcms_transform_release(transform);
1331 return NULL;
1332 }
1333 result = matrix_multiply(out_matrix, in_matrix);
1335 /* store the results in column major mode
1336 * this makes doing the multiplication with sse easier */
1337 transform->matrix[0][0] = result.m[0][0];
1338 transform->matrix[1][0] = result.m[0][1];
1339 transform->matrix[2][0] = result.m[0][2];
1340 transform->matrix[0][1] = result.m[1][0];
1341 transform->matrix[1][1] = result.m[1][1];
1342 transform->matrix[2][1] = result.m[1][2];
1343 transform->matrix[0][2] = result.m[2][0];
1344 transform->matrix[1][2] = result.m[2][1];
1345 transform->matrix[2][2] = result.m[2][2];
1347 } else if (in->color_space == GRAY_SIGNATURE) {
1348 if (in_type != QCMS_DATA_GRAY_8 &&
1349 in_type != QCMS_DATA_GRAYA_8){
1350 assert(0 && "input type");
1351 transform_free(transform);
1352 return NULL;
1353 }
1355 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC);
1356 if (!transform->input_gamma_table_gray) {
1357 qcms_transform_release(transform);
1358 return NO_MEM_TRANSFORM;
1359 }
1361 if (precache) {
1362 if (in_type == QCMS_DATA_GRAY_8) {
1363 transform->transform_fn = qcms_transform_data_gray_out_precache;
1364 } else {
1365 transform->transform_fn = qcms_transform_data_graya_out_precache;
1366 }
1367 } else {
1368 if (in_type == QCMS_DATA_GRAY_8) {
1369 transform->transform_fn = qcms_transform_data_gray_out_lut;
1370 } else {
1371 transform->transform_fn = qcms_transform_data_graya_out_lut;
1372 }
1373 }
1374 } else {
1375 assert(0 && "unexpected colorspace");
1376 transform_free(transform);
1377 return NULL;
1378 }
1379 return transform;
1380 }
1382 #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
1383 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
1384 __attribute__((__force_align_arg_pointer__))
1385 #endif
1386 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
1387 {
1388 transform->transform_fn(transform, src, dest, length);
1389 }
1391 qcms_bool qcms_supports_iccv4;
1392 void qcms_enable_iccv4()
1393 {
1394 qcms_supports_iccv4 = true;
1395 }