|
1 /* vim: set ts=8 sw=8 noexpandtab: */ |
|
2 // qcms |
|
3 // Copyright (C) 2009 Mozilla Corporation |
|
4 // Copyright (C) 1998-2007 Marti Maria |
|
5 // |
|
6 // Permission is hereby granted, free of charge, to any person obtaining |
|
7 // a copy of this software and associated documentation files (the "Software"), |
|
8 // to deal in the Software without restriction, including without limitation |
|
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
10 // and/or sell copies of the Software, and to permit persons to whom the Software |
|
11 // is furnished to do so, subject to the following conditions: |
|
12 // |
|
13 // The above copyright notice and this permission notice shall be included in |
|
14 // all copies or substantial portions of the Software. |
|
15 // |
|
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO |
|
18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
|
20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
|
21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
23 |
|
24 #include <stdlib.h> |
|
25 #include <math.h> |
|
26 #include <assert.h> |
|
27 #include <string.h> //memcpy |
|
28 #include "qcmsint.h" |
|
29 #include "chain.h" |
|
30 #include "matrix.h" |
|
31 #include "transform_util.h" |
|
32 |
|
33 /* for MSVC, GCC, Intel, and Sun compilers */ |
|
34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64) |
|
35 #define X86 |
|
36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */ |
|
37 |
|
38 /** |
|
39 * AltiVec detection for PowerPC CPUs |
|
40 * In case we have a method of detecting do the runtime detection. |
|
41 * Otherwise statically choose the AltiVec path in case the compiler |
|
42 * was told to build with AltiVec support. |
|
43 */ |
|
44 #if (defined(__POWERPC__) || defined(__powerpc__)) |
|
45 #if defined(__linux__) |
|
46 #include <unistd.h> |
|
47 #include <fcntl.h> |
|
48 #include <stdio.h> |
|
49 #include <elf.h> |
|
50 #include <linux/auxvec.h> |
|
51 #include <asm/cputable.h> |
|
52 #include <link.h> |
|
53 |
|
54 static inline qcms_bool have_altivec() { |
|
55 static int available = -1; |
|
56 int new_avail = 0; |
|
57 ElfW(auxv_t) auxv; |
|
58 ssize_t count; |
|
59 int fd, i; |
|
60 |
|
61 if (available != -1) |
|
62 return (available != 0 ? true : false); |
|
63 |
|
64 fd = open("/proc/self/auxv", O_RDONLY); |
|
65 if (fd < 0) |
|
66 goto out; |
|
67 do { |
|
68 count = read(fd, &auxv, sizeof(auxv)); |
|
69 if (count < 0) |
|
70 goto out_close; |
|
71 |
|
72 if (auxv.a_type == AT_HWCAP) { |
|
73 new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC); |
|
74 goto out_close; |
|
75 } |
|
76 } while (auxv.a_type != AT_NULL); |
|
77 |
|
78 out_close: |
|
79 close(fd); |
|
80 out: |
|
81 available = new_avail; |
|
82 return (available != 0 ? true : false); |
|
83 } |
|
84 #elif defined(__APPLE__) && defined(__MACH__) |
|
85 #include <sys/sysctl.h> |
|
86 |
|
87 /** |
|
88 * rip-off from ffmpeg AltiVec detection code. |
|
89 * this code also appears on Apple's AltiVec pages. |
|
90 */ |
|
91 static inline qcms_bool have_altivec() { |
|
92 int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
|
93 static int available = -1; |
|
94 size_t len = sizeof(available); |
|
95 int err; |
|
96 |
|
97 if (available != -1) |
|
98 return (available != 0 ? true : false); |
|
99 |
|
100 err = sysctl(sels, 2, &available, &len, NULL, 0); |
|
101 |
|
102 if (err == 0) |
|
103 if (available != 0) |
|
104 return true; |
|
105 |
|
106 return false; |
|
107 } |
|
108 #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__) |
|
109 #define have_altivec() true |
|
110 #else |
|
111 #define have_altivec() false |
|
112 #endif |
|
113 #endif // (defined(__POWERPC__) || defined(__powerpc__)) |
|
114 |
|
115 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ |
|
116 // This is just an approximation, I am not handling all the non-linear |
|
117 // aspects of the RGB to XYZ process, and assumming that the gamma correction |
|
118 // has transitive property in the tranformation chain. |
|
119 // |
|
120 // the alghoritm: |
|
121 // |
|
122 // - First I build the absolute conversion matrix using |
|
123 // primaries in XYZ. This matrix is next inverted |
|
124 // - Then I eval the source white point across this matrix |
|
125 // obtaining the coeficients of the transformation |
|
126 // - Then, I apply these coeficients to the original matrix |
|
127 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs) |
|
128 { |
|
129 struct matrix primaries; |
|
130 struct matrix primaries_invert; |
|
131 struct matrix result; |
|
132 struct vector white_point; |
|
133 struct vector coefs; |
|
134 |
|
135 double xn, yn; |
|
136 double xr, yr; |
|
137 double xg, yg; |
|
138 double xb, yb; |
|
139 |
|
140 xn = white.x; |
|
141 yn = white.y; |
|
142 |
|
143 if (yn == 0.0) |
|
144 return matrix_invalid(); |
|
145 |
|
146 xr = primrs.red.x; |
|
147 yr = primrs.red.y; |
|
148 xg = primrs.green.x; |
|
149 yg = primrs.green.y; |
|
150 xb = primrs.blue.x; |
|
151 yb = primrs.blue.y; |
|
152 |
|
153 primaries.m[0][0] = xr; |
|
154 primaries.m[0][1] = xg; |
|
155 primaries.m[0][2] = xb; |
|
156 |
|
157 primaries.m[1][0] = yr; |
|
158 primaries.m[1][1] = yg; |
|
159 primaries.m[1][2] = yb; |
|
160 |
|
161 primaries.m[2][0] = 1 - xr - yr; |
|
162 primaries.m[2][1] = 1 - xg - yg; |
|
163 primaries.m[2][2] = 1 - xb - yb; |
|
164 primaries.invalid = false; |
|
165 |
|
166 white_point.v[0] = xn/yn; |
|
167 white_point.v[1] = 1.; |
|
168 white_point.v[2] = (1.0-xn-yn)/yn; |
|
169 |
|
170 primaries_invert = matrix_invert(primaries); |
|
171 |
|
172 coefs = matrix_eval(primaries_invert, white_point); |
|
173 |
|
174 result.m[0][0] = coefs.v[0]*xr; |
|
175 result.m[0][1] = coefs.v[1]*xg; |
|
176 result.m[0][2] = coefs.v[2]*xb; |
|
177 |
|
178 result.m[1][0] = coefs.v[0]*yr; |
|
179 result.m[1][1] = coefs.v[1]*yg; |
|
180 result.m[1][2] = coefs.v[2]*yb; |
|
181 |
|
182 result.m[2][0] = coefs.v[0]*(1.-xr-yr); |
|
183 result.m[2][1] = coefs.v[1]*(1.-xg-yg); |
|
184 result.m[2][2] = coefs.v[2]*(1.-xb-yb); |
|
185 result.invalid = primaries_invert.invalid; |
|
186 |
|
187 return result; |
|
188 } |
|
189 |
|
190 struct CIE_XYZ { |
|
191 double X; |
|
192 double Y; |
|
193 double Z; |
|
194 }; |
|
195 |
|
196 /* CIE Illuminant D50 */ |
|
197 static const struct CIE_XYZ D50_XYZ = { |
|
198 0.9642, |
|
199 1.0000, |
|
200 0.8249 |
|
201 }; |
|
202 |
|
203 /* from lcms: xyY2XYZ() |
|
204 * corresponds to argyll: icmYxy2XYZ() */ |
|
205 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source) |
|
206 { |
|
207 struct CIE_XYZ dest; |
|
208 dest.X = (source.x / source.y) * source.Y; |
|
209 dest.Y = source.Y; |
|
210 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y; |
|
211 return dest; |
|
212 } |
|
213 |
|
214 /* from lcms: ComputeChromaticAdaption */ |
|
215 // Compute chromatic adaption matrix using chad as cone matrix |
|
216 static struct matrix |
|
217 compute_chromatic_adaption(struct CIE_XYZ source_white_point, |
|
218 struct CIE_XYZ dest_white_point, |
|
219 struct matrix chad) |
|
220 { |
|
221 struct matrix chad_inv; |
|
222 struct vector cone_source_XYZ, cone_source_rgb; |
|
223 struct vector cone_dest_XYZ, cone_dest_rgb; |
|
224 struct matrix cone, tmp; |
|
225 |
|
226 tmp = chad; |
|
227 chad_inv = matrix_invert(tmp); |
|
228 |
|
229 cone_source_XYZ.v[0] = source_white_point.X; |
|
230 cone_source_XYZ.v[1] = source_white_point.Y; |
|
231 cone_source_XYZ.v[2] = source_white_point.Z; |
|
232 |
|
233 cone_dest_XYZ.v[0] = dest_white_point.X; |
|
234 cone_dest_XYZ.v[1] = dest_white_point.Y; |
|
235 cone_dest_XYZ.v[2] = dest_white_point.Z; |
|
236 |
|
237 cone_source_rgb = matrix_eval(chad, cone_source_XYZ); |
|
238 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ); |
|
239 |
|
240 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0]; |
|
241 cone.m[0][1] = 0; |
|
242 cone.m[0][2] = 0; |
|
243 cone.m[1][0] = 0; |
|
244 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1]; |
|
245 cone.m[1][2] = 0; |
|
246 cone.m[2][0] = 0; |
|
247 cone.m[2][1] = 0; |
|
248 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2]; |
|
249 cone.invalid = false; |
|
250 |
|
251 // Normalize |
|
252 return matrix_multiply(chad_inv, matrix_multiply(cone, chad)); |
|
253 } |
|
254 |
|
255 /* from lcms: cmsAdaptionMatrix */ |
|
256 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll |
|
257 // Bradford is assumed |
|
258 static struct matrix |
|
259 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination) |
|
260 { |
|
261 struct matrix lam_rigg = {{ // Bradford matrix |
|
262 { 0.8951, 0.2664, -0.1614 }, |
|
263 { -0.7502, 1.7135, 0.0367 }, |
|
264 { 0.0389, -0.0685, 1.0296 } |
|
265 }}; |
|
266 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg); |
|
267 } |
|
268 |
|
269 /* from lcms: cmsAdaptMatrixToD50 */ |
|
270 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt) |
|
271 { |
|
272 struct CIE_XYZ Dn; |
|
273 struct matrix Bradford; |
|
274 |
|
275 if (source_white_pt.y == 0.0) |
|
276 return matrix_invalid(); |
|
277 |
|
278 Dn = xyY2XYZ(source_white_pt); |
|
279 |
|
280 Bradford = adaption_matrix(Dn, D50_XYZ); |
|
281 return matrix_multiply(Bradford, r); |
|
282 } |
|
283 |
|
284 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) |
|
285 { |
|
286 struct matrix colorants; |
|
287 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); |
|
288 colorants = adapt_matrix_to_D50(colorants, white_point); |
|
289 |
|
290 if (colorants.invalid) |
|
291 return false; |
|
292 |
|
293 /* note: there's a transpose type of operation going on here */ |
|
294 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]); |
|
295 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]); |
|
296 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]); |
|
297 |
|
298 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]); |
|
299 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]); |
|
300 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]); |
|
301 |
|
302 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]); |
|
303 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]); |
|
304 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]); |
|
305 |
|
306 return true; |
|
307 } |
|
308 |
|
309 qcms_bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) |
|
310 { |
|
311 *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); |
|
312 *colorants = adapt_matrix_to_D50(*colorants, white_point); |
|
313 |
|
314 return (colorants->invalid ? true : false); |
|
315 } |
|
316 |
|
317 #if 0 |
|
318 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
319 { |
|
320 int i; |
|
321 float (*mat)[4] = transform->matrix; |
|
322 for (i=0; i<length; i++) { |
|
323 unsigned char device_r = *src++; |
|
324 unsigned char device_g = *src++; |
|
325 unsigned char device_b = *src++; |
|
326 |
|
327 float linear_r = transform->input_gamma_table_r[device_r]; |
|
328 float linear_g = transform->input_gamma_table_g[device_g]; |
|
329 float linear_b = transform->input_gamma_table_b[device_b]; |
|
330 |
|
331 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
|
332 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
|
333 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
|
334 |
|
335 float out_device_r = pow(out_linear_r, transform->out_gamma_r); |
|
336 float out_device_g = pow(out_linear_g, transform->out_gamma_g); |
|
337 float out_device_b = pow(out_linear_b, transform->out_gamma_b); |
|
338 |
|
339 dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r); |
|
340 dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g); |
|
341 dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b); |
|
342 dest += RGB_OUTPUT_COMPONENTS; |
|
343 } |
|
344 } |
|
345 #endif |
|
346 |
|
347 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
348 { |
|
349 unsigned int i; |
|
350 for (i = 0; i < length; i++) { |
|
351 float out_device_r, out_device_g, out_device_b; |
|
352 unsigned char device = *src++; |
|
353 |
|
354 float linear = transform->input_gamma_table_gray[device]; |
|
355 |
|
356 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
|
357 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
|
358 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
|
359 |
|
360 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
|
361 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
|
362 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
|
363 dest += RGB_OUTPUT_COMPONENTS; |
|
364 } |
|
365 } |
|
366 |
|
367 /* Alpha is not corrected. |
|
368 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If |
|
369 RGB Is?" Tech Memo 17 (December 14, 1998). |
|
370 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf |
|
371 */ |
|
372 |
|
373 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
374 { |
|
375 unsigned int i; |
|
376 for (i = 0; i < length; i++) { |
|
377 float out_device_r, out_device_g, out_device_b; |
|
378 unsigned char device = *src++; |
|
379 unsigned char alpha = *src++; |
|
380 |
|
381 float linear = transform->input_gamma_table_gray[device]; |
|
382 |
|
383 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
|
384 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
|
385 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
|
386 |
|
387 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
|
388 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
|
389 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
|
390 dest[OUTPUT_A_INDEX] = alpha; |
|
391 dest += RGBA_OUTPUT_COMPONENTS; |
|
392 } |
|
393 } |
|
394 |
|
395 |
|
396 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
397 { |
|
398 unsigned int i; |
|
399 for (i = 0; i < length; i++) { |
|
400 unsigned char device = *src++; |
|
401 uint16_t gray; |
|
402 |
|
403 float linear = transform->input_gamma_table_gray[device]; |
|
404 |
|
405 /* we could round here... */ |
|
406 gray = linear * PRECACHE_OUTPUT_MAX; |
|
407 |
|
408 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray]; |
|
409 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray]; |
|
410 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray]; |
|
411 dest += RGB_OUTPUT_COMPONENTS; |
|
412 } |
|
413 } |
|
414 |
|
415 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
416 { |
|
417 unsigned int i; |
|
418 for (i = 0; i < length; i++) { |
|
419 unsigned char device = *src++; |
|
420 unsigned char alpha = *src++; |
|
421 uint16_t gray; |
|
422 |
|
423 float linear = transform->input_gamma_table_gray[device]; |
|
424 |
|
425 /* we could round here... */ |
|
426 gray = linear * PRECACHE_OUTPUT_MAX; |
|
427 |
|
428 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray]; |
|
429 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray]; |
|
430 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray]; |
|
431 dest[OUTPUT_A_INDEX] = alpha; |
|
432 dest += RGBA_OUTPUT_COMPONENTS; |
|
433 } |
|
434 } |
|
435 |
|
436 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
437 { |
|
438 unsigned int i; |
|
439 float (*mat)[4] = transform->matrix; |
|
440 for (i = 0; i < length; i++) { |
|
441 unsigned char device_r = *src++; |
|
442 unsigned char device_g = *src++; |
|
443 unsigned char device_b = *src++; |
|
444 uint16_t r, g, b; |
|
445 |
|
446 float linear_r = transform->input_gamma_table_r[device_r]; |
|
447 float linear_g = transform->input_gamma_table_g[device_g]; |
|
448 float linear_b = transform->input_gamma_table_b[device_b]; |
|
449 |
|
450 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
|
451 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
|
452 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
|
453 |
|
454 out_linear_r = clamp_float(out_linear_r); |
|
455 out_linear_g = clamp_float(out_linear_g); |
|
456 out_linear_b = clamp_float(out_linear_b); |
|
457 |
|
458 /* we could round here... */ |
|
459 r = out_linear_r * PRECACHE_OUTPUT_MAX; |
|
460 g = out_linear_g * PRECACHE_OUTPUT_MAX; |
|
461 b = out_linear_b * PRECACHE_OUTPUT_MAX; |
|
462 |
|
463 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r]; |
|
464 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g]; |
|
465 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b]; |
|
466 dest += RGB_OUTPUT_COMPONENTS; |
|
467 } |
|
468 } |
|
469 |
|
470 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
471 { |
|
472 unsigned int i; |
|
473 float (*mat)[4] = transform->matrix; |
|
474 for (i = 0; i < length; i++) { |
|
475 unsigned char device_r = *src++; |
|
476 unsigned char device_g = *src++; |
|
477 unsigned char device_b = *src++; |
|
478 unsigned char alpha = *src++; |
|
479 uint16_t r, g, b; |
|
480 |
|
481 float linear_r = transform->input_gamma_table_r[device_r]; |
|
482 float linear_g = transform->input_gamma_table_g[device_g]; |
|
483 float linear_b = transform->input_gamma_table_b[device_b]; |
|
484 |
|
485 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
|
486 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
|
487 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
|
488 |
|
489 out_linear_r = clamp_float(out_linear_r); |
|
490 out_linear_g = clamp_float(out_linear_g); |
|
491 out_linear_b = clamp_float(out_linear_b); |
|
492 |
|
493 /* we could round here... */ |
|
494 r = out_linear_r * PRECACHE_OUTPUT_MAX; |
|
495 g = out_linear_g * PRECACHE_OUTPUT_MAX; |
|
496 b = out_linear_b * PRECACHE_OUTPUT_MAX; |
|
497 |
|
498 dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r]; |
|
499 dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g]; |
|
500 dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b]; |
|
501 dest[OUTPUT_A_INDEX] = alpha; |
|
502 dest += RGBA_OUTPUT_COMPONENTS; |
|
503 } |
|
504 } |
|
505 |
|
506 // Not used |
|
507 /* |
|
508 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
|
509 unsigned int i; |
|
510 int xy_len = 1; |
|
511 int x_len = transform->grid_size; |
|
512 int len = x_len * x_len; |
|
513 float* r_table = transform->r_clut; |
|
514 float* g_table = transform->g_clut; |
|
515 float* b_table = transform->b_clut; |
|
516 |
|
517 for (i = 0; i < length; i++) { |
|
518 unsigned char in_r = *src++; |
|
519 unsigned char in_g = *src++; |
|
520 unsigned char in_b = *src++; |
|
521 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
|
522 |
|
523 int x = floorf(linear_r * (transform->grid_size-1)); |
|
524 int y = floorf(linear_g * (transform->grid_size-1)); |
|
525 int z = floorf(linear_b * (transform->grid_size-1)); |
|
526 int x_n = ceilf(linear_r * (transform->grid_size-1)); |
|
527 int y_n = ceilf(linear_g * (transform->grid_size-1)); |
|
528 int z_n = ceilf(linear_b * (transform->grid_size-1)); |
|
529 float x_d = linear_r * (transform->grid_size-1) - x; |
|
530 float y_d = linear_g * (transform->grid_size-1) - y; |
|
531 float z_d = linear_b * (transform->grid_size-1) - z; |
|
532 |
|
533 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d); |
|
534 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d); |
|
535 float r_y1 = lerp(r_x1, r_x2, y_d); |
|
536 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d); |
|
537 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d); |
|
538 float r_y2 = lerp(r_x3, r_x4, y_d); |
|
539 float clut_r = lerp(r_y1, r_y2, z_d); |
|
540 |
|
541 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d); |
|
542 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d); |
|
543 float g_y1 = lerp(g_x1, g_x2, y_d); |
|
544 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d); |
|
545 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d); |
|
546 float g_y2 = lerp(g_x3, g_x4, y_d); |
|
547 float clut_g = lerp(g_y1, g_y2, z_d); |
|
548 |
|
549 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d); |
|
550 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d); |
|
551 float b_y1 = lerp(b_x1, b_x2, y_d); |
|
552 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d); |
|
553 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d); |
|
554 float b_y2 = lerp(b_x3, b_x4, y_d); |
|
555 float clut_b = lerp(b_y1, b_y2, z_d); |
|
556 |
|
557 *dest++ = clamp_u8(clut_r*255.0f); |
|
558 *dest++ = clamp_u8(clut_g*255.0f); |
|
559 *dest++ = clamp_u8(clut_b*255.0f); |
|
560 } |
|
561 } |
|
562 */ |
|
563 |
|
564 static int int_div_ceil(int value, int div) { |
|
565 return ((value + div - 1) / div); |
|
566 } |
|
567 |
|
568 // Using lcms' tetra interpolation algorithm. |
|
569 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
|
570 unsigned int i; |
|
571 int xy_len = 1; |
|
572 int x_len = transform->grid_size; |
|
573 int len = x_len * x_len; |
|
574 float* r_table = transform->r_clut; |
|
575 float* g_table = transform->g_clut; |
|
576 float* b_table = transform->b_clut; |
|
577 float c0_r, c1_r, c2_r, c3_r; |
|
578 float c0_g, c1_g, c2_g, c3_g; |
|
579 float c0_b, c1_b, c2_b, c3_b; |
|
580 float clut_r, clut_g, clut_b; |
|
581 for (i = 0; i < length; i++) { |
|
582 unsigned char in_r = *src++; |
|
583 unsigned char in_g = *src++; |
|
584 unsigned char in_b = *src++; |
|
585 unsigned char in_a = *src++; |
|
586 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
|
587 |
|
588 int x = in_r * (transform->grid_size-1) / 255; |
|
589 int y = in_g * (transform->grid_size-1) / 255; |
|
590 int z = in_b * (transform->grid_size-1) / 255; |
|
591 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255); |
|
592 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255); |
|
593 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255); |
|
594 float rx = linear_r * (transform->grid_size-1) - x; |
|
595 float ry = linear_g * (transform->grid_size-1) - y; |
|
596 float rz = linear_b * (transform->grid_size-1) - z; |
|
597 |
|
598 c0_r = CLU(r_table, x, y, z); |
|
599 c0_g = CLU(g_table, x, y, z); |
|
600 c0_b = CLU(b_table, x, y, z); |
|
601 |
|
602 if( rx >= ry ) { |
|
603 if (ry >= rz) { //rx >= ry && ry >= rz |
|
604 c1_r = CLU(r_table, x_n, y, z) - c0_r; |
|
605 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); |
|
606 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
|
607 c1_g = CLU(g_table, x_n, y, z) - c0_g; |
|
608 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); |
|
609 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
|
610 c1_b = CLU(b_table, x_n, y, z) - c0_b; |
|
611 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); |
|
612 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
|
613 } else { |
|
614 if (rx >= rz) { //rx >= rz && rz >= ry |
|
615 c1_r = CLU(r_table, x_n, y, z) - c0_r; |
|
616 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
|
617 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); |
|
618 c1_g = CLU(g_table, x_n, y, z) - c0_g; |
|
619 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
|
620 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); |
|
621 c1_b = CLU(b_table, x_n, y, z) - c0_b; |
|
622 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
|
623 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); |
|
624 } else { //rz > rx && rx >= ry |
|
625 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); |
|
626 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
|
627 c3_r = CLU(r_table, x, y, z_n) - c0_r; |
|
628 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); |
|
629 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
|
630 c3_g = CLU(g_table, x, y, z_n) - c0_g; |
|
631 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); |
|
632 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
|
633 c3_b = CLU(b_table, x, y, z_n) - c0_b; |
|
634 } |
|
635 } |
|
636 } else { |
|
637 if (rx >= rz) { //ry > rx && rx >= rz |
|
638 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); |
|
639 c2_r = CLU(r_table, x, y_n, z) - c0_r; |
|
640 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
|
641 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); |
|
642 c2_g = CLU(g_table, x, y_n, z) - c0_g; |
|
643 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
|
644 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); |
|
645 c2_b = CLU(b_table, x, y_n, z) - c0_b; |
|
646 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
|
647 } else { |
|
648 if (ry >= rz) { //ry >= rz && rz > rx |
|
649 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
|
650 c2_r = CLU(r_table, x, y_n, z) - c0_r; |
|
651 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); |
|
652 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
|
653 c2_g = CLU(g_table, x, y_n, z) - c0_g; |
|
654 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); |
|
655 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
|
656 c2_b = CLU(b_table, x, y_n, z) - c0_b; |
|
657 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); |
|
658 } else { //rz > ry && ry > rx |
|
659 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
|
660 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); |
|
661 c3_r = CLU(r_table, x, y, z_n) - c0_r; |
|
662 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
|
663 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); |
|
664 c3_g = CLU(g_table, x, y, z_n) - c0_g; |
|
665 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
|
666 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); |
|
667 c3_b = CLU(b_table, x, y, z_n) - c0_b; |
|
668 } |
|
669 } |
|
670 } |
|
671 |
|
672 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; |
|
673 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; |
|
674 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; |
|
675 |
|
676 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f); |
|
677 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f); |
|
678 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f); |
|
679 dest[OUTPUT_A_INDEX] = in_a; |
|
680 dest += RGBA_OUTPUT_COMPONENTS; |
|
681 } |
|
682 } |
|
683 |
|
684 // Using lcms' tetra interpolation code. |
|
685 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
|
686 unsigned int i; |
|
687 int xy_len = 1; |
|
688 int x_len = transform->grid_size; |
|
689 int len = x_len * x_len; |
|
690 float* r_table = transform->r_clut; |
|
691 float* g_table = transform->g_clut; |
|
692 float* b_table = transform->b_clut; |
|
693 float c0_r, c1_r, c2_r, c3_r; |
|
694 float c0_g, c1_g, c2_g, c3_g; |
|
695 float c0_b, c1_b, c2_b, c3_b; |
|
696 float clut_r, clut_g, clut_b; |
|
697 for (i = 0; i < length; i++) { |
|
698 unsigned char in_r = *src++; |
|
699 unsigned char in_g = *src++; |
|
700 unsigned char in_b = *src++; |
|
701 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
|
702 |
|
703 int x = in_r * (transform->grid_size-1) / 255; |
|
704 int y = in_g * (transform->grid_size-1) / 255; |
|
705 int z = in_b * (transform->grid_size-1) / 255; |
|
706 int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255); |
|
707 int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255); |
|
708 int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255); |
|
709 float rx = linear_r * (transform->grid_size-1) - x; |
|
710 float ry = linear_g * (transform->grid_size-1) - y; |
|
711 float rz = linear_b * (transform->grid_size-1) - z; |
|
712 |
|
713 c0_r = CLU(r_table, x, y, z); |
|
714 c0_g = CLU(g_table, x, y, z); |
|
715 c0_b = CLU(b_table, x, y, z); |
|
716 |
|
717 if( rx >= ry ) { |
|
718 if (ry >= rz) { //rx >= ry && ry >= rz |
|
719 c1_r = CLU(r_table, x_n, y, z) - c0_r; |
|
720 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); |
|
721 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
|
722 c1_g = CLU(g_table, x_n, y, z) - c0_g; |
|
723 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); |
|
724 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
|
725 c1_b = CLU(b_table, x_n, y, z) - c0_b; |
|
726 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); |
|
727 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
|
728 } else { |
|
729 if (rx >= rz) { //rx >= rz && rz >= ry |
|
730 c1_r = CLU(r_table, x_n, y, z) - c0_r; |
|
731 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
|
732 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); |
|
733 c1_g = CLU(g_table, x_n, y, z) - c0_g; |
|
734 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
|
735 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); |
|
736 c1_b = CLU(b_table, x_n, y, z) - c0_b; |
|
737 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
|
738 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); |
|
739 } else { //rz > rx && rx >= ry |
|
740 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); |
|
741 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
|
742 c3_r = CLU(r_table, x, y, z_n) - c0_r; |
|
743 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); |
|
744 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
|
745 c3_g = CLU(g_table, x, y, z_n) - c0_g; |
|
746 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); |
|
747 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
|
748 c3_b = CLU(b_table, x, y, z_n) - c0_b; |
|
749 } |
|
750 } |
|
751 } else { |
|
752 if (rx >= rz) { //ry > rx && rx >= rz |
|
753 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); |
|
754 c2_r = CLU(r_table, x, y_n, z) - c0_r; |
|
755 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
|
756 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); |
|
757 c2_g = CLU(g_table, x, y_n, z) - c0_g; |
|
758 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
|
759 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); |
|
760 c2_b = CLU(b_table, x, y_n, z) - c0_b; |
|
761 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
|
762 } else { |
|
763 if (ry >= rz) { //ry >= rz && rz > rx |
|
764 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
|
765 c2_r = CLU(r_table, x, y_n, z) - c0_r; |
|
766 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); |
|
767 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
|
768 c2_g = CLU(g_table, x, y_n, z) - c0_g; |
|
769 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); |
|
770 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
|
771 c2_b = CLU(b_table, x, y_n, z) - c0_b; |
|
772 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); |
|
773 } else { //rz > ry && ry > rx |
|
774 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
|
775 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); |
|
776 c3_r = CLU(r_table, x, y, z_n) - c0_r; |
|
777 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
|
778 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); |
|
779 c3_g = CLU(g_table, x, y, z_n) - c0_g; |
|
780 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
|
781 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); |
|
782 c3_b = CLU(b_table, x, y, z_n) - c0_b; |
|
783 } |
|
784 } |
|
785 } |
|
786 |
|
787 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; |
|
788 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; |
|
789 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; |
|
790 |
|
791 dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f); |
|
792 dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f); |
|
793 dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f); |
|
794 dest += RGB_OUTPUT_COMPONENTS; |
|
795 } |
|
796 } |
|
797 |
|
798 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
799 { |
|
800 unsigned int i; |
|
801 float (*mat)[4] = transform->matrix; |
|
802 for (i = 0; i < length; i++) { |
|
803 unsigned char device_r = *src++; |
|
804 unsigned char device_g = *src++; |
|
805 unsigned char device_b = *src++; |
|
806 float out_device_r, out_device_g, out_device_b; |
|
807 |
|
808 float linear_r = transform->input_gamma_table_r[device_r]; |
|
809 float linear_g = transform->input_gamma_table_g[device_g]; |
|
810 float linear_b = transform->input_gamma_table_b[device_b]; |
|
811 |
|
812 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
|
813 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
|
814 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
|
815 |
|
816 out_linear_r = clamp_float(out_linear_r); |
|
817 out_linear_g = clamp_float(out_linear_g); |
|
818 out_linear_b = clamp_float(out_linear_b); |
|
819 |
|
820 out_device_r = lut_interp_linear(out_linear_r, |
|
821 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
|
822 out_device_g = lut_interp_linear(out_linear_g, |
|
823 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
|
824 out_device_b = lut_interp_linear(out_linear_b, |
|
825 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
|
826 |
|
827 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
|
828 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
|
829 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
|
830 dest += RGB_OUTPUT_COMPONENTS; |
|
831 } |
|
832 } |
|
833 |
|
834 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
835 { |
|
836 unsigned int i; |
|
837 float (*mat)[4] = transform->matrix; |
|
838 for (i = 0; i < length; i++) { |
|
839 unsigned char device_r = *src++; |
|
840 unsigned char device_g = *src++; |
|
841 unsigned char device_b = *src++; |
|
842 unsigned char alpha = *src++; |
|
843 float out_device_r, out_device_g, out_device_b; |
|
844 |
|
845 float linear_r = transform->input_gamma_table_r[device_r]; |
|
846 float linear_g = transform->input_gamma_table_g[device_g]; |
|
847 float linear_b = transform->input_gamma_table_b[device_b]; |
|
848 |
|
849 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
|
850 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
|
851 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
|
852 |
|
853 out_linear_r = clamp_float(out_linear_r); |
|
854 out_linear_g = clamp_float(out_linear_g); |
|
855 out_linear_b = clamp_float(out_linear_b); |
|
856 |
|
857 out_device_r = lut_interp_linear(out_linear_r, |
|
858 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
|
859 out_device_g = lut_interp_linear(out_linear_g, |
|
860 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
|
861 out_device_b = lut_interp_linear(out_linear_b, |
|
862 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
|
863 |
|
864 dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
|
865 dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
|
866 dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
|
867 dest[OUTPUT_A_INDEX] = alpha; |
|
868 dest += RGBA_OUTPUT_COMPONENTS; |
|
869 } |
|
870 } |
|
871 |
|
872 #if 0 |
|
873 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
|
874 { |
|
875 int i; |
|
876 float (*mat)[4] = transform->matrix; |
|
877 for (i = 0; i < length; i++) { |
|
878 unsigned char device_r = *src++; |
|
879 unsigned char device_g = *src++; |
|
880 unsigned char device_b = *src++; |
|
881 |
|
882 float linear_r = transform->input_gamma_table_r[device_r]; |
|
883 float linear_g = transform->input_gamma_table_g[device_g]; |
|
884 float linear_b = transform->input_gamma_table_b[device_b]; |
|
885 |
|
886 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
|
887 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
|
888 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
|
889 |
|
890 *dest++ = clamp_u8(out_linear_r*255); |
|
891 *dest++ = clamp_u8(out_linear_g*255); |
|
892 *dest++ = clamp_u8(out_linear_b*255); |
|
893 } |
|
894 } |
|
895 #endif |
|
896 |
|
897 /* |
|
898 * If users create and destroy objects on different threads, even if the same |
|
899 * objects aren't used on different threads at the same time, we can still run |
|
900 * in to trouble with refcounts if they aren't atomic. |
|
901 * |
|
902 * This can lead to us prematurely deleting the precache if threads get unlucky |
|
903 * and write the wrong value to the ref count. |
|
904 */ |
|
905 static struct precache_output *precache_reference(struct precache_output *p) |
|
906 { |
|
907 qcms_atomic_increment(p->ref_count); |
|
908 return p; |
|
909 } |
|
910 |
|
911 static struct precache_output *precache_create() |
|
912 { |
|
913 struct precache_output *p = malloc(sizeof(struct precache_output)); |
|
914 if (p) |
|
915 p->ref_count = 1; |
|
916 return p; |
|
917 } |
|
918 |
|
919 void precache_release(struct precache_output *p) |
|
920 { |
|
921 if (qcms_atomic_decrement(p->ref_count) == 0) { |
|
922 free(p); |
|
923 } |
|
924 } |
|
925 |
|
926 #ifdef HAS_POSIX_MEMALIGN |
|
927 static qcms_transform *transform_alloc(void) |
|
928 { |
|
929 qcms_transform *t; |
|
930 if (!posix_memalign(&t, 16, sizeof(*t))) { |
|
931 return t; |
|
932 } else { |
|
933 return NULL; |
|
934 } |
|
935 } |
|
936 static void transform_free(qcms_transform *t) |
|
937 { |
|
938 free(t); |
|
939 } |
|
940 #else |
|
941 static qcms_transform *transform_alloc(void) |
|
942 { |
|
943 /* transform needs to be aligned on a 16byte boundrary */ |
|
944 char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1); |
|
945 /* make room for a pointer to the block returned by calloc */ |
|
946 void *transform_start = original_block + sizeof(void*); |
|
947 /* align transform_start */ |
|
948 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf); |
|
949 |
|
950 /* store a pointer to the block returned by calloc so that we can free it later */ |
|
951 void **(original_block_ptr) = (void**)transform_aligned; |
|
952 if (!original_block) |
|
953 return NULL; |
|
954 original_block_ptr--; |
|
955 *original_block_ptr = original_block; |
|
956 |
|
957 return transform_aligned; |
|
958 } |
|
959 static void transform_free(qcms_transform *t) |
|
960 { |
|
961 /* get at the pointer to the unaligned block returned by calloc */ |
|
962 void **p = (void**)t; |
|
963 p--; |
|
964 free(*p); |
|
965 } |
|
966 #endif |
|
967 |
|
968 void qcms_transform_release(qcms_transform *t) |
|
969 { |
|
970 /* ensure we only free the gamma tables once even if there are |
|
971 * multiple references to the same data */ |
|
972 |
|
973 if (t->output_table_r) |
|
974 precache_release(t->output_table_r); |
|
975 if (t->output_table_g) |
|
976 precache_release(t->output_table_g); |
|
977 if (t->output_table_b) |
|
978 precache_release(t->output_table_b); |
|
979 |
|
980 free(t->input_gamma_table_r); |
|
981 if (t->input_gamma_table_g != t->input_gamma_table_r) |
|
982 free(t->input_gamma_table_g); |
|
983 if (t->input_gamma_table_g != t->input_gamma_table_r && |
|
984 t->input_gamma_table_g != t->input_gamma_table_b) |
|
985 free(t->input_gamma_table_b); |
|
986 |
|
987 free(t->input_gamma_table_gray); |
|
988 |
|
989 free(t->output_gamma_lut_r); |
|
990 free(t->output_gamma_lut_g); |
|
991 free(t->output_gamma_lut_b); |
|
992 |
|
993 transform_free(t); |
|
994 } |
|
995 |
|
996 #ifdef X86 |
|
997 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in |
|
998 // mozilla/jpeg) |
|
999 // ------------------------------------------------------------------------- |
|
1000 #if defined(_M_IX86) && defined(_MSC_VER) |
|
1001 #define HAS_CPUID |
|
1002 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC |
|
1003 register - I'm not sure if that ever happens on windows, but cpuid isn't |
|
1004 on the critical path so we just preserve the register to be safe and to be |
|
1005 consistent with the non-windows version. */ |
|
1006 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { |
|
1007 uint32_t a_, b_, c_, d_; |
|
1008 __asm { |
|
1009 xchg ebx, esi |
|
1010 mov eax, fxn |
|
1011 cpuid |
|
1012 mov a_, eax |
|
1013 mov b_, ebx |
|
1014 mov c_, ecx |
|
1015 mov d_, edx |
|
1016 xchg ebx, esi |
|
1017 } |
|
1018 *a = a_; |
|
1019 *b = b_; |
|
1020 *c = c_; |
|
1021 *d = d_; |
|
1022 } |
|
1023 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386)) |
|
1024 #define HAS_CPUID |
|
1025 /* Get us a CPUID function. We can't use ebx because it's the PIC register on |
|
1026 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */ |
|
1027 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { |
|
1028 |
|
1029 uint32_t a_, b_, c_, d_; |
|
1030 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;" |
|
1031 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn)); |
|
1032 *a = a_; |
|
1033 *b = b_; |
|
1034 *c = c_; |
|
1035 *d = d_; |
|
1036 } |
|
1037 #endif |
|
1038 |
|
1039 // -------------------------Runtime SSEx Detection----------------------------- |
|
1040 |
|
1041 /* MMX is always supported per |
|
1042 * Gecko v1.9.1 minimum CPU requirements */ |
|
1043 #define SSE1_EDX_MASK (1UL << 25) |
|
1044 #define SSE2_EDX_MASK (1UL << 26) |
|
1045 #define SSE3_ECX_MASK (1UL << 0) |
|
1046 |
|
1047 static int sse_version_available(void) |
|
1048 { |
|
1049 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
|
1050 /* we know at build time that 64-bit CPUs always have SSE2 |
|
1051 * this tells the compiler that non-SSE2 branches will never be |
|
1052 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */ |
|
1053 return 2; |
|
1054 #elif defined(HAS_CPUID) |
|
1055 static int sse_version = -1; |
|
1056 uint32_t a, b, c, d; |
|
1057 uint32_t function = 0x00000001; |
|
1058 |
|
1059 if (sse_version == -1) { |
|
1060 sse_version = 0; |
|
1061 cpuid(function, &a, &b, &c, &d); |
|
1062 if (c & SSE3_ECX_MASK) |
|
1063 sse_version = 3; |
|
1064 else if (d & SSE2_EDX_MASK) |
|
1065 sse_version = 2; |
|
1066 else if (d & SSE1_EDX_MASK) |
|
1067 sse_version = 1; |
|
1068 } |
|
1069 |
|
1070 return sse_version; |
|
1071 #else |
|
1072 return 0; |
|
1073 #endif |
|
1074 } |
|
1075 #endif |
|
1076 |
|
1077 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f}, |
|
1078 {-0.7502f, 1.7135f, 0.0367f}, |
|
1079 { 0.0389f,-0.0685f, 1.0296f}}, |
|
1080 false}; |
|
1081 |
|
1082 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f}, |
|
1083 { 0.4323053f, 0.5183603f, 0.0492912f}, |
|
1084 {-0.0085287f, 0.0400428f, 0.9684867f}}, |
|
1085 false}; |
|
1086 |
|
1087 // See ICCv4 E.3 |
|
1088 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) { |
|
1089 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) / |
|
1090 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] ); |
|
1091 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) / |
|
1092 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] ); |
|
1093 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) / |
|
1094 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] ); |
|
1095 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false}; |
|
1096 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) ); |
|
1097 } |
|
1098 |
|
1099 void qcms_profile_precache_output_transform(qcms_profile *profile) |
|
1100 { |
|
1101 /* we only support precaching on rgb profiles */ |
|
1102 if (profile->color_space != RGB_SIGNATURE) |
|
1103 return; |
|
1104 |
|
1105 if (qcms_supports_iccv4) { |
|
1106 /* don't precache since we will use the B2A LUT */ |
|
1107 if (profile->B2A0) |
|
1108 return; |
|
1109 |
|
1110 /* don't precache since we will use the mBA LUT */ |
|
1111 if (profile->mBA) |
|
1112 return; |
|
1113 } |
|
1114 |
|
1115 /* don't precache if we do not have the TRC curves */ |
|
1116 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC) |
|
1117 return; |
|
1118 |
|
1119 if (!profile->output_table_r) { |
|
1120 profile->output_table_r = precache_create(); |
|
1121 if (profile->output_table_r && |
|
1122 !compute_precache(profile->redTRC, profile->output_table_r->data)) { |
|
1123 precache_release(profile->output_table_r); |
|
1124 profile->output_table_r = NULL; |
|
1125 } |
|
1126 } |
|
1127 if (!profile->output_table_g) { |
|
1128 profile->output_table_g = precache_create(); |
|
1129 if (profile->output_table_g && |
|
1130 !compute_precache(profile->greenTRC, profile->output_table_g->data)) { |
|
1131 precache_release(profile->output_table_g); |
|
1132 profile->output_table_g = NULL; |
|
1133 } |
|
1134 } |
|
1135 if (!profile->output_table_b) { |
|
1136 profile->output_table_b = precache_create(); |
|
1137 if (profile->output_table_b && |
|
1138 !compute_precache(profile->blueTRC, profile->output_table_b->data)) { |
|
1139 precache_release(profile->output_table_b); |
|
1140 profile->output_table_b = NULL; |
|
1141 } |
|
1142 } |
|
1143 } |
|
1144 |
|
1145 /* Replace the current transformation with a LUT transformation using a given number of sample points */ |
|
1146 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out, |
|
1147 int samples, qcms_data_type in_type) |
|
1148 { |
|
1149 /* The range between which 2 consecutive sample points can be used to interpolate */ |
|
1150 uint16_t x,y,z; |
|
1151 uint32_t l; |
|
1152 uint32_t lutSize = 3 * samples * samples * samples; |
|
1153 float* src = NULL; |
|
1154 float* dest = NULL; |
|
1155 float* lut = NULL; |
|
1156 |
|
1157 src = malloc(lutSize*sizeof(float)); |
|
1158 dest = malloc(lutSize*sizeof(float)); |
|
1159 |
|
1160 if (src && dest) { |
|
1161 /* Prepare a list of points we want to sample */ |
|
1162 l = 0; |
|
1163 for (x = 0; x < samples; x++) { |
|
1164 for (y = 0; y < samples; y++) { |
|
1165 for (z = 0; z < samples; z++) { |
|
1166 src[l++] = x / (float)(samples-1); |
|
1167 src[l++] = y / (float)(samples-1); |
|
1168 src[l++] = z / (float)(samples-1); |
|
1169 } |
|
1170 } |
|
1171 } |
|
1172 |
|
1173 lut = qcms_chain_transform(in, out, src, dest, lutSize); |
|
1174 if (lut) { |
|
1175 transform->r_clut = &lut[0]; |
|
1176 transform->g_clut = &lut[1]; |
|
1177 transform->b_clut = &lut[2]; |
|
1178 transform->grid_size = samples; |
|
1179 if (in_type == QCMS_DATA_RGBA_8) { |
|
1180 transform->transform_fn = qcms_transform_data_tetra_clut_rgba; |
|
1181 } else { |
|
1182 transform->transform_fn = qcms_transform_data_tetra_clut; |
|
1183 } |
|
1184 } |
|
1185 } |
|
1186 |
|
1187 |
|
1188 //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed |
|
1189 if (src && lut != src) { |
|
1190 free(src); |
|
1191 } |
|
1192 if (dest && lut != dest) { |
|
1193 free(dest); |
|
1194 } |
|
1195 |
|
1196 if (lut == NULL) { |
|
1197 return NULL; |
|
1198 } |
|
1199 return transform; |
|
1200 } |
|
1201 |
|
1202 #define NO_MEM_TRANSFORM NULL |
|
1203 |
|
1204 qcms_transform* qcms_transform_create( |
|
1205 qcms_profile *in, qcms_data_type in_type, |
|
1206 qcms_profile *out, qcms_data_type out_type, |
|
1207 qcms_intent intent) |
|
1208 { |
|
1209 bool precache = false; |
|
1210 |
|
1211 qcms_transform *transform = transform_alloc(); |
|
1212 if (!transform) { |
|
1213 return NULL; |
|
1214 } |
|
1215 if (out_type != QCMS_DATA_RGB_8 && |
|
1216 out_type != QCMS_DATA_RGBA_8) { |
|
1217 assert(0 && "output type"); |
|
1218 transform_free(transform); |
|
1219 return NULL; |
|
1220 } |
|
1221 |
|
1222 if (out->output_table_r && |
|
1223 out->output_table_g && |
|
1224 out->output_table_b) { |
|
1225 precache = true; |
|
1226 } |
|
1227 |
|
1228 // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance) |
|
1229 if (qcms_supports_iccv4 && |
|
1230 (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8) && |
|
1231 (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) |
|
1232 { |
|
1233 // Precache the transformation to a CLUT 33x33x33 in size. |
|
1234 // 33 is used by many profiles and works well in pratice. |
|
1235 // This evenly divides 256 into blocks of 8x8x8. |
|
1236 // TODO For transforming small data sets of about 200x200 or less |
|
1237 // precaching should be avoided. |
|
1238 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type); |
|
1239 if (!result) { |
|
1240 assert(0 && "precacheLUT failed"); |
|
1241 transform_free(transform); |
|
1242 return NULL; |
|
1243 } |
|
1244 return result; |
|
1245 } |
|
1246 |
|
1247 if (precache) { |
|
1248 transform->output_table_r = precache_reference(out->output_table_r); |
|
1249 transform->output_table_g = precache_reference(out->output_table_g); |
|
1250 transform->output_table_b = precache_reference(out->output_table_b); |
|
1251 } else { |
|
1252 if (!out->redTRC || !out->greenTRC || !out->blueTRC) { |
|
1253 qcms_transform_release(transform); |
|
1254 return NO_MEM_TRANSFORM; |
|
1255 } |
|
1256 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length); |
|
1257 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length); |
|
1258 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length); |
|
1259 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) { |
|
1260 qcms_transform_release(transform); |
|
1261 return NO_MEM_TRANSFORM; |
|
1262 } |
|
1263 } |
|
1264 |
|
1265 if (in->color_space == RGB_SIGNATURE) { |
|
1266 struct matrix in_matrix, out_matrix, result; |
|
1267 |
|
1268 if (in_type != QCMS_DATA_RGB_8 && |
|
1269 in_type != QCMS_DATA_RGBA_8){ |
|
1270 assert(0 && "input type"); |
|
1271 transform_free(transform); |
|
1272 return NULL; |
|
1273 } |
|
1274 if (precache) { |
|
1275 #ifdef X86 |
|
1276 if (sse_version_available() >= 2) { |
|
1277 if (in_type == QCMS_DATA_RGB_8) |
|
1278 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2; |
|
1279 else |
|
1280 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2; |
|
1281 |
|
1282 #if !(defined(_MSC_VER) && defined(_M_AMD64)) |
|
1283 /* Microsoft Compiler for x64 doesn't support MMX. |
|
1284 * SSE code uses MMX so that we disable on x64 */ |
|
1285 } else |
|
1286 if (sse_version_available() >= 1) { |
|
1287 if (in_type == QCMS_DATA_RGB_8) |
|
1288 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1; |
|
1289 else |
|
1290 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1; |
|
1291 #endif |
|
1292 } else |
|
1293 #endif |
|
1294 #if (defined(__POWERPC__) || defined(__powerpc__)) |
|
1295 if (have_altivec()) { |
|
1296 if (in_type == QCMS_DATA_RGB_8) |
|
1297 transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec; |
|
1298 else |
|
1299 transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec; |
|
1300 } else |
|
1301 #endif |
|
1302 { |
|
1303 if (in_type == QCMS_DATA_RGB_8) |
|
1304 transform->transform_fn = qcms_transform_data_rgb_out_lut_precache; |
|
1305 else |
|
1306 transform->transform_fn = qcms_transform_data_rgba_out_lut_precache; |
|
1307 } |
|
1308 } else { |
|
1309 if (in_type == QCMS_DATA_RGB_8) |
|
1310 transform->transform_fn = qcms_transform_data_rgb_out_lut; |
|
1311 else |
|
1312 transform->transform_fn = qcms_transform_data_rgba_out_lut; |
|
1313 } |
|
1314 |
|
1315 //XXX: avoid duplicating tables if we can |
|
1316 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC); |
|
1317 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC); |
|
1318 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC); |
|
1319 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) { |
|
1320 qcms_transform_release(transform); |
|
1321 return NO_MEM_TRANSFORM; |
|
1322 } |
|
1323 |
|
1324 |
|
1325 /* build combined colorant matrix */ |
|
1326 in_matrix = build_colorant_matrix(in); |
|
1327 out_matrix = build_colorant_matrix(out); |
|
1328 out_matrix = matrix_invert(out_matrix); |
|
1329 if (out_matrix.invalid) { |
|
1330 qcms_transform_release(transform); |
|
1331 return NULL; |
|
1332 } |
|
1333 result = matrix_multiply(out_matrix, in_matrix); |
|
1334 |
|
1335 /* store the results in column major mode |
|
1336 * this makes doing the multiplication with sse easier */ |
|
1337 transform->matrix[0][0] = result.m[0][0]; |
|
1338 transform->matrix[1][0] = result.m[0][1]; |
|
1339 transform->matrix[2][0] = result.m[0][2]; |
|
1340 transform->matrix[0][1] = result.m[1][0]; |
|
1341 transform->matrix[1][1] = result.m[1][1]; |
|
1342 transform->matrix[2][1] = result.m[1][2]; |
|
1343 transform->matrix[0][2] = result.m[2][0]; |
|
1344 transform->matrix[1][2] = result.m[2][1]; |
|
1345 transform->matrix[2][2] = result.m[2][2]; |
|
1346 |
|
1347 } else if (in->color_space == GRAY_SIGNATURE) { |
|
1348 if (in_type != QCMS_DATA_GRAY_8 && |
|
1349 in_type != QCMS_DATA_GRAYA_8){ |
|
1350 assert(0 && "input type"); |
|
1351 transform_free(transform); |
|
1352 return NULL; |
|
1353 } |
|
1354 |
|
1355 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC); |
|
1356 if (!transform->input_gamma_table_gray) { |
|
1357 qcms_transform_release(transform); |
|
1358 return NO_MEM_TRANSFORM; |
|
1359 } |
|
1360 |
|
1361 if (precache) { |
|
1362 if (in_type == QCMS_DATA_GRAY_8) { |
|
1363 transform->transform_fn = qcms_transform_data_gray_out_precache; |
|
1364 } else { |
|
1365 transform->transform_fn = qcms_transform_data_graya_out_precache; |
|
1366 } |
|
1367 } else { |
|
1368 if (in_type == QCMS_DATA_GRAY_8) { |
|
1369 transform->transform_fn = qcms_transform_data_gray_out_lut; |
|
1370 } else { |
|
1371 transform->transform_fn = qcms_transform_data_graya_out_lut; |
|
1372 } |
|
1373 } |
|
1374 } else { |
|
1375 assert(0 && "unexpected colorspace"); |
|
1376 transform_free(transform); |
|
1377 return NULL; |
|
1378 } |
|
1379 return transform; |
|
1380 } |
|
1381 |
|
1382 #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) |
|
1383 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */ |
|
1384 __attribute__((__force_align_arg_pointer__)) |
|
1385 #endif |
|
1386 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length) |
|
1387 { |
|
1388 transform->transform_fn(transform, src, dest, length); |
|
1389 } |
|
1390 |
|
1391 qcms_bool qcms_supports_iccv4; |
|
1392 void qcms_enable_iccv4() |
|
1393 { |
|
1394 qcms_supports_iccv4 = true; |
|
1395 } |