Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* vim: set ts=8 sw=8 noexpandtab: */ |
michael@0 | 2 | // qcms |
michael@0 | 3 | // Copyright (C) 2009 Mozilla Corporation |
michael@0 | 4 | // Copyright (C) 1998-2007 Marti Maria |
michael@0 | 5 | // |
michael@0 | 6 | // Permission is hereby granted, free of charge, to any person obtaining |
michael@0 | 7 | // a copy of this software and associated documentation files (the "Software"), |
michael@0 | 8 | // to deal in the Software without restriction, including without limitation |
michael@0 | 9 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
michael@0 | 10 | // and/or sell copies of the Software, and to permit persons to whom the Software |
michael@0 | 11 | // is furnished to do so, subject to the following conditions: |
michael@0 | 12 | // |
michael@0 | 13 | // The above copyright notice and this permission notice shall be included in |
michael@0 | 14 | // all copies or substantial portions of the Software. |
michael@0 | 15 | // |
michael@0 | 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
michael@0 | 17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO |
michael@0 | 18 | // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
michael@0 | 19 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
michael@0 | 20 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
michael@0 | 21 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
michael@0 | 22 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
michael@0 | 23 | |
michael@0 | 24 | #include <stdlib.h> |
michael@0 | 25 | #include <math.h> |
michael@0 | 26 | #include <assert.h> |
michael@0 | 27 | #include <string.h> //memcpy |
michael@0 | 28 | #include "qcmsint.h" |
michael@0 | 29 | #include "chain.h" |
michael@0 | 30 | #include "matrix.h" |
michael@0 | 31 | #include "transform_util.h" |
michael@0 | 32 | |
michael@0 | 33 | /* for MSVC, GCC, Intel, and Sun compilers */ |
michael@0 | 34 | #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64) |
michael@0 | 35 | #define X86 |
michael@0 | 36 | #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */ |
michael@0 | 37 | |
michael@0 | 38 | /** |
michael@0 | 39 | * AltiVec detection for PowerPC CPUs |
michael@0 | 40 | * In case we have a method of detecting do the runtime detection. |
michael@0 | 41 | * Otherwise statically choose the AltiVec path in case the compiler |
michael@0 | 42 | * was told to build with AltiVec support. |
michael@0 | 43 | */ |
michael@0 | 44 | #if (defined(__POWERPC__) || defined(__powerpc__)) |
michael@0 | 45 | #if defined(__linux__) |
michael@0 | 46 | #include <unistd.h> |
michael@0 | 47 | #include <fcntl.h> |
michael@0 | 48 | #include <stdio.h> |
michael@0 | 49 | #include <elf.h> |
michael@0 | 50 | #include <linux/auxvec.h> |
michael@0 | 51 | #include <asm/cputable.h> |
michael@0 | 52 | #include <link.h> |
michael@0 | 53 | |
michael@0 | 54 | static inline qcms_bool have_altivec() { |
michael@0 | 55 | static int available = -1; |
michael@0 | 56 | int new_avail = 0; |
michael@0 | 57 | ElfW(auxv_t) auxv; |
michael@0 | 58 | ssize_t count; |
michael@0 | 59 | int fd, i; |
michael@0 | 60 | |
michael@0 | 61 | if (available != -1) |
michael@0 | 62 | return (available != 0 ? true : false); |
michael@0 | 63 | |
michael@0 | 64 | fd = open("/proc/self/auxv", O_RDONLY); |
michael@0 | 65 | if (fd < 0) |
michael@0 | 66 | goto out; |
michael@0 | 67 | do { |
michael@0 | 68 | count = read(fd, &auxv, sizeof(auxv)); |
michael@0 | 69 | if (count < 0) |
michael@0 | 70 | goto out_close; |
michael@0 | 71 | |
michael@0 | 72 | if (auxv.a_type == AT_HWCAP) { |
michael@0 | 73 | new_avail = !!(auxv.a_un.a_val & PPC_FEATURE_HAS_ALTIVEC); |
michael@0 | 74 | goto out_close; |
michael@0 | 75 | } |
michael@0 | 76 | } while (auxv.a_type != AT_NULL); |
michael@0 | 77 | |
michael@0 | 78 | out_close: |
michael@0 | 79 | close(fd); |
michael@0 | 80 | out: |
michael@0 | 81 | available = new_avail; |
michael@0 | 82 | return (available != 0 ? true : false); |
michael@0 | 83 | } |
michael@0 | 84 | #elif defined(__APPLE__) && defined(__MACH__) |
michael@0 | 85 | #include <sys/sysctl.h> |
michael@0 | 86 | |
michael@0 | 87 | /** |
michael@0 | 88 | * rip-off from ffmpeg AltiVec detection code. |
michael@0 | 89 | * this code also appears on Apple's AltiVec pages. |
michael@0 | 90 | */ |
michael@0 | 91 | static inline qcms_bool have_altivec() { |
michael@0 | 92 | int sels[2] = {CTL_HW, HW_VECTORUNIT}; |
michael@0 | 93 | static int available = -1; |
michael@0 | 94 | size_t len = sizeof(available); |
michael@0 | 95 | int err; |
michael@0 | 96 | |
michael@0 | 97 | if (available != -1) |
michael@0 | 98 | return (available != 0 ? true : false); |
michael@0 | 99 | |
michael@0 | 100 | err = sysctl(sels, 2, &available, &len, NULL, 0); |
michael@0 | 101 | |
michael@0 | 102 | if (err == 0) |
michael@0 | 103 | if (available != 0) |
michael@0 | 104 | return true; |
michael@0 | 105 | |
michael@0 | 106 | return false; |
michael@0 | 107 | } |
michael@0 | 108 | #elif defined(__ALTIVEC__) || defined(__APPLE_ALTIVEC__) |
michael@0 | 109 | #define have_altivec() true |
michael@0 | 110 | #else |
michael@0 | 111 | #define have_altivec() false |
michael@0 | 112 | #endif |
michael@0 | 113 | #endif // (defined(__POWERPC__) || defined(__powerpc__)) |
michael@0 | 114 | |
michael@0 | 115 | // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ |
michael@0 | 116 | // This is just an approximation, I am not handling all the non-linear |
michael@0 | 117 | // aspects of the RGB to XYZ process, and assumming that the gamma correction |
michael@0 | 118 | // has transitive property in the tranformation chain. |
michael@0 | 119 | // |
michael@0 | 120 | // the alghoritm: |
michael@0 | 121 | // |
michael@0 | 122 | // - First I build the absolute conversion matrix using |
michael@0 | 123 | // primaries in XYZ. This matrix is next inverted |
michael@0 | 124 | // - Then I eval the source white point across this matrix |
michael@0 | 125 | // obtaining the coeficients of the transformation |
michael@0 | 126 | // - Then, I apply these coeficients to the original matrix |
michael@0 | 127 | static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs) |
michael@0 | 128 | { |
michael@0 | 129 | struct matrix primaries; |
michael@0 | 130 | struct matrix primaries_invert; |
michael@0 | 131 | struct matrix result; |
michael@0 | 132 | struct vector white_point; |
michael@0 | 133 | struct vector coefs; |
michael@0 | 134 | |
michael@0 | 135 | double xn, yn; |
michael@0 | 136 | double xr, yr; |
michael@0 | 137 | double xg, yg; |
michael@0 | 138 | double xb, yb; |
michael@0 | 139 | |
michael@0 | 140 | xn = white.x; |
michael@0 | 141 | yn = white.y; |
michael@0 | 142 | |
michael@0 | 143 | if (yn == 0.0) |
michael@0 | 144 | return matrix_invalid(); |
michael@0 | 145 | |
michael@0 | 146 | xr = primrs.red.x; |
michael@0 | 147 | yr = primrs.red.y; |
michael@0 | 148 | xg = primrs.green.x; |
michael@0 | 149 | yg = primrs.green.y; |
michael@0 | 150 | xb = primrs.blue.x; |
michael@0 | 151 | yb = primrs.blue.y; |
michael@0 | 152 | |
michael@0 | 153 | primaries.m[0][0] = xr; |
michael@0 | 154 | primaries.m[0][1] = xg; |
michael@0 | 155 | primaries.m[0][2] = xb; |
michael@0 | 156 | |
michael@0 | 157 | primaries.m[1][0] = yr; |
michael@0 | 158 | primaries.m[1][1] = yg; |
michael@0 | 159 | primaries.m[1][2] = yb; |
michael@0 | 160 | |
michael@0 | 161 | primaries.m[2][0] = 1 - xr - yr; |
michael@0 | 162 | primaries.m[2][1] = 1 - xg - yg; |
michael@0 | 163 | primaries.m[2][2] = 1 - xb - yb; |
michael@0 | 164 | primaries.invalid = false; |
michael@0 | 165 | |
michael@0 | 166 | white_point.v[0] = xn/yn; |
michael@0 | 167 | white_point.v[1] = 1.; |
michael@0 | 168 | white_point.v[2] = (1.0-xn-yn)/yn; |
michael@0 | 169 | |
michael@0 | 170 | primaries_invert = matrix_invert(primaries); |
michael@0 | 171 | |
michael@0 | 172 | coefs = matrix_eval(primaries_invert, white_point); |
michael@0 | 173 | |
michael@0 | 174 | result.m[0][0] = coefs.v[0]*xr; |
michael@0 | 175 | result.m[0][1] = coefs.v[1]*xg; |
michael@0 | 176 | result.m[0][2] = coefs.v[2]*xb; |
michael@0 | 177 | |
michael@0 | 178 | result.m[1][0] = coefs.v[0]*yr; |
michael@0 | 179 | result.m[1][1] = coefs.v[1]*yg; |
michael@0 | 180 | result.m[1][2] = coefs.v[2]*yb; |
michael@0 | 181 | |
michael@0 | 182 | result.m[2][0] = coefs.v[0]*(1.-xr-yr); |
michael@0 | 183 | result.m[2][1] = coefs.v[1]*(1.-xg-yg); |
michael@0 | 184 | result.m[2][2] = coefs.v[2]*(1.-xb-yb); |
michael@0 | 185 | result.invalid = primaries_invert.invalid; |
michael@0 | 186 | |
michael@0 | 187 | return result; |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | struct CIE_XYZ { |
michael@0 | 191 | double X; |
michael@0 | 192 | double Y; |
michael@0 | 193 | double Z; |
michael@0 | 194 | }; |
michael@0 | 195 | |
michael@0 | 196 | /* CIE Illuminant D50 */ |
michael@0 | 197 | static const struct CIE_XYZ D50_XYZ = { |
michael@0 | 198 | 0.9642, |
michael@0 | 199 | 1.0000, |
michael@0 | 200 | 0.8249 |
michael@0 | 201 | }; |
michael@0 | 202 | |
michael@0 | 203 | /* from lcms: xyY2XYZ() |
michael@0 | 204 | * corresponds to argyll: icmYxy2XYZ() */ |
michael@0 | 205 | static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source) |
michael@0 | 206 | { |
michael@0 | 207 | struct CIE_XYZ dest; |
michael@0 | 208 | dest.X = (source.x / source.y) * source.Y; |
michael@0 | 209 | dest.Y = source.Y; |
michael@0 | 210 | dest.Z = ((1 - source.x - source.y) / source.y) * source.Y; |
michael@0 | 211 | return dest; |
michael@0 | 212 | } |
michael@0 | 213 | |
michael@0 | 214 | /* from lcms: ComputeChromaticAdaption */ |
michael@0 | 215 | // Compute chromatic adaption matrix using chad as cone matrix |
michael@0 | 216 | static struct matrix |
michael@0 | 217 | compute_chromatic_adaption(struct CIE_XYZ source_white_point, |
michael@0 | 218 | struct CIE_XYZ dest_white_point, |
michael@0 | 219 | struct matrix chad) |
michael@0 | 220 | { |
michael@0 | 221 | struct matrix chad_inv; |
michael@0 | 222 | struct vector cone_source_XYZ, cone_source_rgb; |
michael@0 | 223 | struct vector cone_dest_XYZ, cone_dest_rgb; |
michael@0 | 224 | struct matrix cone, tmp; |
michael@0 | 225 | |
michael@0 | 226 | tmp = chad; |
michael@0 | 227 | chad_inv = matrix_invert(tmp); |
michael@0 | 228 | |
michael@0 | 229 | cone_source_XYZ.v[0] = source_white_point.X; |
michael@0 | 230 | cone_source_XYZ.v[1] = source_white_point.Y; |
michael@0 | 231 | cone_source_XYZ.v[2] = source_white_point.Z; |
michael@0 | 232 | |
michael@0 | 233 | cone_dest_XYZ.v[0] = dest_white_point.X; |
michael@0 | 234 | cone_dest_XYZ.v[1] = dest_white_point.Y; |
michael@0 | 235 | cone_dest_XYZ.v[2] = dest_white_point.Z; |
michael@0 | 236 | |
michael@0 | 237 | cone_source_rgb = matrix_eval(chad, cone_source_XYZ); |
michael@0 | 238 | cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ); |
michael@0 | 239 | |
michael@0 | 240 | cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0]; |
michael@0 | 241 | cone.m[0][1] = 0; |
michael@0 | 242 | cone.m[0][2] = 0; |
michael@0 | 243 | cone.m[1][0] = 0; |
michael@0 | 244 | cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1]; |
michael@0 | 245 | cone.m[1][2] = 0; |
michael@0 | 246 | cone.m[2][0] = 0; |
michael@0 | 247 | cone.m[2][1] = 0; |
michael@0 | 248 | cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2]; |
michael@0 | 249 | cone.invalid = false; |
michael@0 | 250 | |
michael@0 | 251 | // Normalize |
michael@0 | 252 | return matrix_multiply(chad_inv, matrix_multiply(cone, chad)); |
michael@0 | 253 | } |
michael@0 | 254 | |
michael@0 | 255 | /* from lcms: cmsAdaptionMatrix */ |
michael@0 | 256 | // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll |
michael@0 | 257 | // Bradford is assumed |
michael@0 | 258 | static struct matrix |
michael@0 | 259 | adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination) |
michael@0 | 260 | { |
michael@0 | 261 | struct matrix lam_rigg = {{ // Bradford matrix |
michael@0 | 262 | { 0.8951, 0.2664, -0.1614 }, |
michael@0 | 263 | { -0.7502, 1.7135, 0.0367 }, |
michael@0 | 264 | { 0.0389, -0.0685, 1.0296 } |
michael@0 | 265 | }}; |
michael@0 | 266 | return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg); |
michael@0 | 267 | } |
michael@0 | 268 | |
michael@0 | 269 | /* from lcms: cmsAdaptMatrixToD50 */ |
michael@0 | 270 | static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt) |
michael@0 | 271 | { |
michael@0 | 272 | struct CIE_XYZ Dn; |
michael@0 | 273 | struct matrix Bradford; |
michael@0 | 274 | |
michael@0 | 275 | if (source_white_pt.y == 0.0) |
michael@0 | 276 | return matrix_invalid(); |
michael@0 | 277 | |
michael@0 | 278 | Dn = xyY2XYZ(source_white_pt); |
michael@0 | 279 | |
michael@0 | 280 | Bradford = adaption_matrix(Dn, D50_XYZ); |
michael@0 | 281 | return matrix_multiply(Bradford, r); |
michael@0 | 282 | } |
michael@0 | 283 | |
michael@0 | 284 | qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) |
michael@0 | 285 | { |
michael@0 | 286 | struct matrix colorants; |
michael@0 | 287 | colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); |
michael@0 | 288 | colorants = adapt_matrix_to_D50(colorants, white_point); |
michael@0 | 289 | |
michael@0 | 290 | if (colorants.invalid) |
michael@0 | 291 | return false; |
michael@0 | 292 | |
michael@0 | 293 | /* note: there's a transpose type of operation going on here */ |
michael@0 | 294 | profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]); |
michael@0 | 295 | profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]); |
michael@0 | 296 | profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]); |
michael@0 | 297 | |
michael@0 | 298 | profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]); |
michael@0 | 299 | profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]); |
michael@0 | 300 | profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]); |
michael@0 | 301 | |
michael@0 | 302 | profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]); |
michael@0 | 303 | profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]); |
michael@0 | 304 | profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]); |
michael@0 | 305 | |
michael@0 | 306 | return true; |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | qcms_bool get_rgb_colorants(struct matrix *colorants, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) |
michael@0 | 310 | { |
michael@0 | 311 | *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); |
michael@0 | 312 | *colorants = adapt_matrix_to_D50(*colorants, white_point); |
michael@0 | 313 | |
michael@0 | 314 | return (colorants->invalid ? true : false); |
michael@0 | 315 | } |
michael@0 | 316 | |
michael@0 | 317 | #if 0 |
michael@0 | 318 | static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 319 | { |
michael@0 | 320 | int i; |
michael@0 | 321 | float (*mat)[4] = transform->matrix; |
michael@0 | 322 | for (i=0; i<length; i++) { |
michael@0 | 323 | unsigned char device_r = *src++; |
michael@0 | 324 | unsigned char device_g = *src++; |
michael@0 | 325 | unsigned char device_b = *src++; |
michael@0 | 326 | |
michael@0 | 327 | float linear_r = transform->input_gamma_table_r[device_r]; |
michael@0 | 328 | float linear_g = transform->input_gamma_table_g[device_g]; |
michael@0 | 329 | float linear_b = transform->input_gamma_table_b[device_b]; |
michael@0 | 330 | |
michael@0 | 331 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
michael@0 | 332 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
michael@0 | 333 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
michael@0 | 334 | |
michael@0 | 335 | float out_device_r = pow(out_linear_r, transform->out_gamma_r); |
michael@0 | 336 | float out_device_g = pow(out_linear_g, transform->out_gamma_g); |
michael@0 | 337 | float out_device_b = pow(out_linear_b, transform->out_gamma_b); |
michael@0 | 338 | |
michael@0 | 339 | dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r); |
michael@0 | 340 | dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g); |
michael@0 | 341 | dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b); |
michael@0 | 342 | dest += RGB_OUTPUT_COMPONENTS; |
michael@0 | 343 | } |
michael@0 | 344 | } |
michael@0 | 345 | #endif |
michael@0 | 346 | |
michael@0 | 347 | static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 348 | { |
michael@0 | 349 | unsigned int i; |
michael@0 | 350 | for (i = 0; i < length; i++) { |
michael@0 | 351 | float out_device_r, out_device_g, out_device_b; |
michael@0 | 352 | unsigned char device = *src++; |
michael@0 | 353 | |
michael@0 | 354 | float linear = transform->input_gamma_table_gray[device]; |
michael@0 | 355 | |
michael@0 | 356 | out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
michael@0 | 357 | out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
michael@0 | 358 | out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
michael@0 | 359 | |
michael@0 | 360 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
michael@0 | 361 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
michael@0 | 362 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
michael@0 | 363 | dest += RGB_OUTPUT_COMPONENTS; |
michael@0 | 364 | } |
michael@0 | 365 | } |
michael@0 | 366 | |
michael@0 | 367 | /* Alpha is not corrected. |
michael@0 | 368 | A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If |
michael@0 | 369 | RGB Is?" Tech Memo 17 (December 14, 1998). |
michael@0 | 370 | See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf |
michael@0 | 371 | */ |
michael@0 | 372 | |
michael@0 | 373 | static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 374 | { |
michael@0 | 375 | unsigned int i; |
michael@0 | 376 | for (i = 0; i < length; i++) { |
michael@0 | 377 | float out_device_r, out_device_g, out_device_b; |
michael@0 | 378 | unsigned char device = *src++; |
michael@0 | 379 | unsigned char alpha = *src++; |
michael@0 | 380 | |
michael@0 | 381 | float linear = transform->input_gamma_table_gray[device]; |
michael@0 | 382 | |
michael@0 | 383 | out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
michael@0 | 384 | out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
michael@0 | 385 | out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
michael@0 | 386 | |
michael@0 | 387 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
michael@0 | 388 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
michael@0 | 389 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
michael@0 | 390 | dest[OUTPUT_A_INDEX] = alpha; |
michael@0 | 391 | dest += RGBA_OUTPUT_COMPONENTS; |
michael@0 | 392 | } |
michael@0 | 393 | } |
michael@0 | 394 | |
michael@0 | 395 | |
michael@0 | 396 | static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 397 | { |
michael@0 | 398 | unsigned int i; |
michael@0 | 399 | for (i = 0; i < length; i++) { |
michael@0 | 400 | unsigned char device = *src++; |
michael@0 | 401 | uint16_t gray; |
michael@0 | 402 | |
michael@0 | 403 | float linear = transform->input_gamma_table_gray[device]; |
michael@0 | 404 | |
michael@0 | 405 | /* we could round here... */ |
michael@0 | 406 | gray = linear * PRECACHE_OUTPUT_MAX; |
michael@0 | 407 | |
michael@0 | 408 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray]; |
michael@0 | 409 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray]; |
michael@0 | 410 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray]; |
michael@0 | 411 | dest += RGB_OUTPUT_COMPONENTS; |
michael@0 | 412 | } |
michael@0 | 413 | } |
michael@0 | 414 | |
michael@0 | 415 | static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 416 | { |
michael@0 | 417 | unsigned int i; |
michael@0 | 418 | for (i = 0; i < length; i++) { |
michael@0 | 419 | unsigned char device = *src++; |
michael@0 | 420 | unsigned char alpha = *src++; |
michael@0 | 421 | uint16_t gray; |
michael@0 | 422 | |
michael@0 | 423 | float linear = transform->input_gamma_table_gray[device]; |
michael@0 | 424 | |
michael@0 | 425 | /* we could round here... */ |
michael@0 | 426 | gray = linear * PRECACHE_OUTPUT_MAX; |
michael@0 | 427 | |
michael@0 | 428 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray]; |
michael@0 | 429 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray]; |
michael@0 | 430 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray]; |
michael@0 | 431 | dest[OUTPUT_A_INDEX] = alpha; |
michael@0 | 432 | dest += RGBA_OUTPUT_COMPONENTS; |
michael@0 | 433 | } |
michael@0 | 434 | } |
michael@0 | 435 | |
michael@0 | 436 | static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 437 | { |
michael@0 | 438 | unsigned int i; |
michael@0 | 439 | float (*mat)[4] = transform->matrix; |
michael@0 | 440 | for (i = 0; i < length; i++) { |
michael@0 | 441 | unsigned char device_r = *src++; |
michael@0 | 442 | unsigned char device_g = *src++; |
michael@0 | 443 | unsigned char device_b = *src++; |
michael@0 | 444 | uint16_t r, g, b; |
michael@0 | 445 | |
michael@0 | 446 | float linear_r = transform->input_gamma_table_r[device_r]; |
michael@0 | 447 | float linear_g = transform->input_gamma_table_g[device_g]; |
michael@0 | 448 | float linear_b = transform->input_gamma_table_b[device_b]; |
michael@0 | 449 | |
michael@0 | 450 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
michael@0 | 451 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
michael@0 | 452 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
michael@0 | 453 | |
michael@0 | 454 | out_linear_r = clamp_float(out_linear_r); |
michael@0 | 455 | out_linear_g = clamp_float(out_linear_g); |
michael@0 | 456 | out_linear_b = clamp_float(out_linear_b); |
michael@0 | 457 | |
michael@0 | 458 | /* we could round here... */ |
michael@0 | 459 | r = out_linear_r * PRECACHE_OUTPUT_MAX; |
michael@0 | 460 | g = out_linear_g * PRECACHE_OUTPUT_MAX; |
michael@0 | 461 | b = out_linear_b * PRECACHE_OUTPUT_MAX; |
michael@0 | 462 | |
michael@0 | 463 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r]; |
michael@0 | 464 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g]; |
michael@0 | 465 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b]; |
michael@0 | 466 | dest += RGB_OUTPUT_COMPONENTS; |
michael@0 | 467 | } |
michael@0 | 468 | } |
michael@0 | 469 | |
michael@0 | 470 | static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 471 | { |
michael@0 | 472 | unsigned int i; |
michael@0 | 473 | float (*mat)[4] = transform->matrix; |
michael@0 | 474 | for (i = 0; i < length; i++) { |
michael@0 | 475 | unsigned char device_r = *src++; |
michael@0 | 476 | unsigned char device_g = *src++; |
michael@0 | 477 | unsigned char device_b = *src++; |
michael@0 | 478 | unsigned char alpha = *src++; |
michael@0 | 479 | uint16_t r, g, b; |
michael@0 | 480 | |
michael@0 | 481 | float linear_r = transform->input_gamma_table_r[device_r]; |
michael@0 | 482 | float linear_g = transform->input_gamma_table_g[device_g]; |
michael@0 | 483 | float linear_b = transform->input_gamma_table_b[device_b]; |
michael@0 | 484 | |
michael@0 | 485 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
michael@0 | 486 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
michael@0 | 487 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
michael@0 | 488 | |
michael@0 | 489 | out_linear_r = clamp_float(out_linear_r); |
michael@0 | 490 | out_linear_g = clamp_float(out_linear_g); |
michael@0 | 491 | out_linear_b = clamp_float(out_linear_b); |
michael@0 | 492 | |
michael@0 | 493 | /* we could round here... */ |
michael@0 | 494 | r = out_linear_r * PRECACHE_OUTPUT_MAX; |
michael@0 | 495 | g = out_linear_g * PRECACHE_OUTPUT_MAX; |
michael@0 | 496 | b = out_linear_b * PRECACHE_OUTPUT_MAX; |
michael@0 | 497 | |
michael@0 | 498 | dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r]; |
michael@0 | 499 | dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g]; |
michael@0 | 500 | dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b]; |
michael@0 | 501 | dest[OUTPUT_A_INDEX] = alpha; |
michael@0 | 502 | dest += RGBA_OUTPUT_COMPONENTS; |
michael@0 | 503 | } |
michael@0 | 504 | } |
michael@0 | 505 | |
michael@0 | 506 | // Not used |
michael@0 | 507 | /* |
michael@0 | 508 | static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
michael@0 | 509 | unsigned int i; |
michael@0 | 510 | int xy_len = 1; |
michael@0 | 511 | int x_len = transform->grid_size; |
michael@0 | 512 | int len = x_len * x_len; |
michael@0 | 513 | float* r_table = transform->r_clut; |
michael@0 | 514 | float* g_table = transform->g_clut; |
michael@0 | 515 | float* b_table = transform->b_clut; |
michael@0 | 516 | |
michael@0 | 517 | for (i = 0; i < length; i++) { |
michael@0 | 518 | unsigned char in_r = *src++; |
michael@0 | 519 | unsigned char in_g = *src++; |
michael@0 | 520 | unsigned char in_b = *src++; |
michael@0 | 521 | float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
michael@0 | 522 | |
michael@0 | 523 | int x = floorf(linear_r * (transform->grid_size-1)); |
michael@0 | 524 | int y = floorf(linear_g * (transform->grid_size-1)); |
michael@0 | 525 | int z = floorf(linear_b * (transform->grid_size-1)); |
michael@0 | 526 | int x_n = ceilf(linear_r * (transform->grid_size-1)); |
michael@0 | 527 | int y_n = ceilf(linear_g * (transform->grid_size-1)); |
michael@0 | 528 | int z_n = ceilf(linear_b * (transform->grid_size-1)); |
michael@0 | 529 | float x_d = linear_r * (transform->grid_size-1) - x; |
michael@0 | 530 | float y_d = linear_g * (transform->grid_size-1) - y; |
michael@0 | 531 | float z_d = linear_b * (transform->grid_size-1) - z; |
michael@0 | 532 | |
michael@0 | 533 | float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d); |
michael@0 | 534 | float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d); |
michael@0 | 535 | float r_y1 = lerp(r_x1, r_x2, y_d); |
michael@0 | 536 | float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d); |
michael@0 | 537 | float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d); |
michael@0 | 538 | float r_y2 = lerp(r_x3, r_x4, y_d); |
michael@0 | 539 | float clut_r = lerp(r_y1, r_y2, z_d); |
michael@0 | 540 | |
michael@0 | 541 | float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d); |
michael@0 | 542 | float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d); |
michael@0 | 543 | float g_y1 = lerp(g_x1, g_x2, y_d); |
michael@0 | 544 | float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d); |
michael@0 | 545 | float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d); |
michael@0 | 546 | float g_y2 = lerp(g_x3, g_x4, y_d); |
michael@0 | 547 | float clut_g = lerp(g_y1, g_y2, z_d); |
michael@0 | 548 | |
michael@0 | 549 | float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d); |
michael@0 | 550 | float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d); |
michael@0 | 551 | float b_y1 = lerp(b_x1, b_x2, y_d); |
michael@0 | 552 | float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d); |
michael@0 | 553 | float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d); |
michael@0 | 554 | float b_y2 = lerp(b_x3, b_x4, y_d); |
michael@0 | 555 | float clut_b = lerp(b_y1, b_y2, z_d); |
michael@0 | 556 | |
michael@0 | 557 | *dest++ = clamp_u8(clut_r*255.0f); |
michael@0 | 558 | *dest++ = clamp_u8(clut_g*255.0f); |
michael@0 | 559 | *dest++ = clamp_u8(clut_b*255.0f); |
michael@0 | 560 | } |
michael@0 | 561 | } |
michael@0 | 562 | */ |
michael@0 | 563 | |
michael@0 | 564 | static int int_div_ceil(int value, int div) { |
michael@0 | 565 | return ((value + div - 1) / div); |
michael@0 | 566 | } |
michael@0 | 567 | |
michael@0 | 568 | // Using lcms' tetra interpolation algorithm. |
michael@0 | 569 | static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
michael@0 | 570 | unsigned int i; |
michael@0 | 571 | int xy_len = 1; |
michael@0 | 572 | int x_len = transform->grid_size; |
michael@0 | 573 | int len = x_len * x_len; |
michael@0 | 574 | float* r_table = transform->r_clut; |
michael@0 | 575 | float* g_table = transform->g_clut; |
michael@0 | 576 | float* b_table = transform->b_clut; |
michael@0 | 577 | float c0_r, c1_r, c2_r, c3_r; |
michael@0 | 578 | float c0_g, c1_g, c2_g, c3_g; |
michael@0 | 579 | float c0_b, c1_b, c2_b, c3_b; |
michael@0 | 580 | float clut_r, clut_g, clut_b; |
michael@0 | 581 | for (i = 0; i < length; i++) { |
michael@0 | 582 | unsigned char in_r = *src++; |
michael@0 | 583 | unsigned char in_g = *src++; |
michael@0 | 584 | unsigned char in_b = *src++; |
michael@0 | 585 | unsigned char in_a = *src++; |
michael@0 | 586 | float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
michael@0 | 587 | |
michael@0 | 588 | int x = in_r * (transform->grid_size-1) / 255; |
michael@0 | 589 | int y = in_g * (transform->grid_size-1) / 255; |
michael@0 | 590 | int z = in_b * (transform->grid_size-1) / 255; |
michael@0 | 591 | int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255); |
michael@0 | 592 | int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255); |
michael@0 | 593 | int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255); |
michael@0 | 594 | float rx = linear_r * (transform->grid_size-1) - x; |
michael@0 | 595 | float ry = linear_g * (transform->grid_size-1) - y; |
michael@0 | 596 | float rz = linear_b * (transform->grid_size-1) - z; |
michael@0 | 597 | |
michael@0 | 598 | c0_r = CLU(r_table, x, y, z); |
michael@0 | 599 | c0_g = CLU(g_table, x, y, z); |
michael@0 | 600 | c0_b = CLU(b_table, x, y, z); |
michael@0 | 601 | |
michael@0 | 602 | if( rx >= ry ) { |
michael@0 | 603 | if (ry >= rz) { //rx >= ry && ry >= rz |
michael@0 | 604 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
michael@0 | 605 | c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); |
michael@0 | 606 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
michael@0 | 607 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
michael@0 | 608 | c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); |
michael@0 | 609 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
michael@0 | 610 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
michael@0 | 611 | c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); |
michael@0 | 612 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
michael@0 | 613 | } else { |
michael@0 | 614 | if (rx >= rz) { //rx >= rz && rz >= ry |
michael@0 | 615 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
michael@0 | 616 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
michael@0 | 617 | c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); |
michael@0 | 618 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
michael@0 | 619 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
michael@0 | 620 | c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); |
michael@0 | 621 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
michael@0 | 622 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
michael@0 | 623 | c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); |
michael@0 | 624 | } else { //rz > rx && rx >= ry |
michael@0 | 625 | c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); |
michael@0 | 626 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
michael@0 | 627 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
michael@0 | 628 | c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); |
michael@0 | 629 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
michael@0 | 630 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
michael@0 | 631 | c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); |
michael@0 | 632 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
michael@0 | 633 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
michael@0 | 634 | } |
michael@0 | 635 | } |
michael@0 | 636 | } else { |
michael@0 | 637 | if (rx >= rz) { //ry > rx && rx >= rz |
michael@0 | 638 | c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); |
michael@0 | 639 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
michael@0 | 640 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
michael@0 | 641 | c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); |
michael@0 | 642 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
michael@0 | 643 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
michael@0 | 644 | c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); |
michael@0 | 645 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
michael@0 | 646 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
michael@0 | 647 | } else { |
michael@0 | 648 | if (ry >= rz) { //ry >= rz && rz > rx |
michael@0 | 649 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
michael@0 | 650 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
michael@0 | 651 | c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); |
michael@0 | 652 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
michael@0 | 653 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
michael@0 | 654 | c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); |
michael@0 | 655 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
michael@0 | 656 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
michael@0 | 657 | c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); |
michael@0 | 658 | } else { //rz > ry && ry > rx |
michael@0 | 659 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
michael@0 | 660 | c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); |
michael@0 | 661 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
michael@0 | 662 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
michael@0 | 663 | c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); |
michael@0 | 664 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
michael@0 | 665 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
michael@0 | 666 | c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); |
michael@0 | 667 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
michael@0 | 668 | } |
michael@0 | 669 | } |
michael@0 | 670 | } |
michael@0 | 671 | |
michael@0 | 672 | clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; |
michael@0 | 673 | clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; |
michael@0 | 674 | clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; |
michael@0 | 675 | |
michael@0 | 676 | dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f); |
michael@0 | 677 | dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f); |
michael@0 | 678 | dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f); |
michael@0 | 679 | dest[OUTPUT_A_INDEX] = in_a; |
michael@0 | 680 | dest += RGBA_OUTPUT_COMPONENTS; |
michael@0 | 681 | } |
michael@0 | 682 | } |
michael@0 | 683 | |
michael@0 | 684 | // Using lcms' tetra interpolation code. |
michael@0 | 685 | static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) { |
michael@0 | 686 | unsigned int i; |
michael@0 | 687 | int xy_len = 1; |
michael@0 | 688 | int x_len = transform->grid_size; |
michael@0 | 689 | int len = x_len * x_len; |
michael@0 | 690 | float* r_table = transform->r_clut; |
michael@0 | 691 | float* g_table = transform->g_clut; |
michael@0 | 692 | float* b_table = transform->b_clut; |
michael@0 | 693 | float c0_r, c1_r, c2_r, c3_r; |
michael@0 | 694 | float c0_g, c1_g, c2_g, c3_g; |
michael@0 | 695 | float c0_b, c1_b, c2_b, c3_b; |
michael@0 | 696 | float clut_r, clut_g, clut_b; |
michael@0 | 697 | for (i = 0; i < length; i++) { |
michael@0 | 698 | unsigned char in_r = *src++; |
michael@0 | 699 | unsigned char in_g = *src++; |
michael@0 | 700 | unsigned char in_b = *src++; |
michael@0 | 701 | float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; |
michael@0 | 702 | |
michael@0 | 703 | int x = in_r * (transform->grid_size-1) / 255; |
michael@0 | 704 | int y = in_g * (transform->grid_size-1) / 255; |
michael@0 | 705 | int z = in_b * (transform->grid_size-1) / 255; |
michael@0 | 706 | int x_n = int_div_ceil(in_r * (transform->grid_size-1), 255); |
michael@0 | 707 | int y_n = int_div_ceil(in_g * (transform->grid_size-1), 255); |
michael@0 | 708 | int z_n = int_div_ceil(in_b * (transform->grid_size-1), 255); |
michael@0 | 709 | float rx = linear_r * (transform->grid_size-1) - x; |
michael@0 | 710 | float ry = linear_g * (transform->grid_size-1) - y; |
michael@0 | 711 | float rz = linear_b * (transform->grid_size-1) - z; |
michael@0 | 712 | |
michael@0 | 713 | c0_r = CLU(r_table, x, y, z); |
michael@0 | 714 | c0_g = CLU(g_table, x, y, z); |
michael@0 | 715 | c0_b = CLU(b_table, x, y, z); |
michael@0 | 716 | |
michael@0 | 717 | if( rx >= ry ) { |
michael@0 | 718 | if (ry >= rz) { //rx >= ry && ry >= rz |
michael@0 | 719 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
michael@0 | 720 | c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); |
michael@0 | 721 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
michael@0 | 722 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
michael@0 | 723 | c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); |
michael@0 | 724 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
michael@0 | 725 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
michael@0 | 726 | c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); |
michael@0 | 727 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
michael@0 | 728 | } else { |
michael@0 | 729 | if (rx >= rz) { //rx >= rz && rz >= ry |
michael@0 | 730 | c1_r = CLU(r_table, x_n, y, z) - c0_r; |
michael@0 | 731 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
michael@0 | 732 | c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); |
michael@0 | 733 | c1_g = CLU(g_table, x_n, y, z) - c0_g; |
michael@0 | 734 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
michael@0 | 735 | c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); |
michael@0 | 736 | c1_b = CLU(b_table, x_n, y, z) - c0_b; |
michael@0 | 737 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
michael@0 | 738 | c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); |
michael@0 | 739 | } else { //rz > rx && rx >= ry |
michael@0 | 740 | c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); |
michael@0 | 741 | c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); |
michael@0 | 742 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
michael@0 | 743 | c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); |
michael@0 | 744 | c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); |
michael@0 | 745 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
michael@0 | 746 | c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); |
michael@0 | 747 | c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); |
michael@0 | 748 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
michael@0 | 749 | } |
michael@0 | 750 | } |
michael@0 | 751 | } else { |
michael@0 | 752 | if (rx >= rz) { //ry > rx && rx >= rz |
michael@0 | 753 | c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); |
michael@0 | 754 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
michael@0 | 755 | c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); |
michael@0 | 756 | c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); |
michael@0 | 757 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
michael@0 | 758 | c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); |
michael@0 | 759 | c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); |
michael@0 | 760 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
michael@0 | 761 | c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); |
michael@0 | 762 | } else { |
michael@0 | 763 | if (ry >= rz) { //ry >= rz && rz > rx |
michael@0 | 764 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
michael@0 | 765 | c2_r = CLU(r_table, x, y_n, z) - c0_r; |
michael@0 | 766 | c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); |
michael@0 | 767 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
michael@0 | 768 | c2_g = CLU(g_table, x, y_n, z) - c0_g; |
michael@0 | 769 | c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); |
michael@0 | 770 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
michael@0 | 771 | c2_b = CLU(b_table, x, y_n, z) - c0_b; |
michael@0 | 772 | c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); |
michael@0 | 773 | } else { //rz > ry && ry > rx |
michael@0 | 774 | c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); |
michael@0 | 775 | c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); |
michael@0 | 776 | c3_r = CLU(r_table, x, y, z_n) - c0_r; |
michael@0 | 777 | c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); |
michael@0 | 778 | c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); |
michael@0 | 779 | c3_g = CLU(g_table, x, y, z_n) - c0_g; |
michael@0 | 780 | c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); |
michael@0 | 781 | c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); |
michael@0 | 782 | c3_b = CLU(b_table, x, y, z_n) - c0_b; |
michael@0 | 783 | } |
michael@0 | 784 | } |
michael@0 | 785 | } |
michael@0 | 786 | |
michael@0 | 787 | clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; |
michael@0 | 788 | clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; |
michael@0 | 789 | clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; |
michael@0 | 790 | |
michael@0 | 791 | dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f); |
michael@0 | 792 | dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f); |
michael@0 | 793 | dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f); |
michael@0 | 794 | dest += RGB_OUTPUT_COMPONENTS; |
michael@0 | 795 | } |
michael@0 | 796 | } |
michael@0 | 797 | |
michael@0 | 798 | static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 799 | { |
michael@0 | 800 | unsigned int i; |
michael@0 | 801 | float (*mat)[4] = transform->matrix; |
michael@0 | 802 | for (i = 0; i < length; i++) { |
michael@0 | 803 | unsigned char device_r = *src++; |
michael@0 | 804 | unsigned char device_g = *src++; |
michael@0 | 805 | unsigned char device_b = *src++; |
michael@0 | 806 | float out_device_r, out_device_g, out_device_b; |
michael@0 | 807 | |
michael@0 | 808 | float linear_r = transform->input_gamma_table_r[device_r]; |
michael@0 | 809 | float linear_g = transform->input_gamma_table_g[device_g]; |
michael@0 | 810 | float linear_b = transform->input_gamma_table_b[device_b]; |
michael@0 | 811 | |
michael@0 | 812 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
michael@0 | 813 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
michael@0 | 814 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
michael@0 | 815 | |
michael@0 | 816 | out_linear_r = clamp_float(out_linear_r); |
michael@0 | 817 | out_linear_g = clamp_float(out_linear_g); |
michael@0 | 818 | out_linear_b = clamp_float(out_linear_b); |
michael@0 | 819 | |
michael@0 | 820 | out_device_r = lut_interp_linear(out_linear_r, |
michael@0 | 821 | transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
michael@0 | 822 | out_device_g = lut_interp_linear(out_linear_g, |
michael@0 | 823 | transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
michael@0 | 824 | out_device_b = lut_interp_linear(out_linear_b, |
michael@0 | 825 | transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
michael@0 | 826 | |
michael@0 | 827 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
michael@0 | 828 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
michael@0 | 829 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
michael@0 | 830 | dest += RGB_OUTPUT_COMPONENTS; |
michael@0 | 831 | } |
michael@0 | 832 | } |
michael@0 | 833 | |
michael@0 | 834 | static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 835 | { |
michael@0 | 836 | unsigned int i; |
michael@0 | 837 | float (*mat)[4] = transform->matrix; |
michael@0 | 838 | for (i = 0; i < length; i++) { |
michael@0 | 839 | unsigned char device_r = *src++; |
michael@0 | 840 | unsigned char device_g = *src++; |
michael@0 | 841 | unsigned char device_b = *src++; |
michael@0 | 842 | unsigned char alpha = *src++; |
michael@0 | 843 | float out_device_r, out_device_g, out_device_b; |
michael@0 | 844 | |
michael@0 | 845 | float linear_r = transform->input_gamma_table_r[device_r]; |
michael@0 | 846 | float linear_g = transform->input_gamma_table_g[device_g]; |
michael@0 | 847 | float linear_b = transform->input_gamma_table_b[device_b]; |
michael@0 | 848 | |
michael@0 | 849 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
michael@0 | 850 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
michael@0 | 851 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
michael@0 | 852 | |
michael@0 | 853 | out_linear_r = clamp_float(out_linear_r); |
michael@0 | 854 | out_linear_g = clamp_float(out_linear_g); |
michael@0 | 855 | out_linear_b = clamp_float(out_linear_b); |
michael@0 | 856 | |
michael@0 | 857 | out_device_r = lut_interp_linear(out_linear_r, |
michael@0 | 858 | transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); |
michael@0 | 859 | out_device_g = lut_interp_linear(out_linear_g, |
michael@0 | 860 | transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); |
michael@0 | 861 | out_device_b = lut_interp_linear(out_linear_b, |
michael@0 | 862 | transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); |
michael@0 | 863 | |
michael@0 | 864 | dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255); |
michael@0 | 865 | dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255); |
michael@0 | 866 | dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255); |
michael@0 | 867 | dest[OUTPUT_A_INDEX] = alpha; |
michael@0 | 868 | dest += RGBA_OUTPUT_COMPONENTS; |
michael@0 | 869 | } |
michael@0 | 870 | } |
michael@0 | 871 | |
michael@0 | 872 | #if 0 |
michael@0 | 873 | static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) |
michael@0 | 874 | { |
michael@0 | 875 | int i; |
michael@0 | 876 | float (*mat)[4] = transform->matrix; |
michael@0 | 877 | for (i = 0; i < length; i++) { |
michael@0 | 878 | unsigned char device_r = *src++; |
michael@0 | 879 | unsigned char device_g = *src++; |
michael@0 | 880 | unsigned char device_b = *src++; |
michael@0 | 881 | |
michael@0 | 882 | float linear_r = transform->input_gamma_table_r[device_r]; |
michael@0 | 883 | float linear_g = transform->input_gamma_table_g[device_g]; |
michael@0 | 884 | float linear_b = transform->input_gamma_table_b[device_b]; |
michael@0 | 885 | |
michael@0 | 886 | float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; |
michael@0 | 887 | float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; |
michael@0 | 888 | float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; |
michael@0 | 889 | |
michael@0 | 890 | *dest++ = clamp_u8(out_linear_r*255); |
michael@0 | 891 | *dest++ = clamp_u8(out_linear_g*255); |
michael@0 | 892 | *dest++ = clamp_u8(out_linear_b*255); |
michael@0 | 893 | } |
michael@0 | 894 | } |
michael@0 | 895 | #endif |
michael@0 | 896 | |
michael@0 | 897 | /* |
michael@0 | 898 | * If users create and destroy objects on different threads, even if the same |
michael@0 | 899 | * objects aren't used on different threads at the same time, we can still run |
michael@0 | 900 | * in to trouble with refcounts if they aren't atomic. |
michael@0 | 901 | * |
michael@0 | 902 | * This can lead to us prematurely deleting the precache if threads get unlucky |
michael@0 | 903 | * and write the wrong value to the ref count. |
michael@0 | 904 | */ |
michael@0 | 905 | static struct precache_output *precache_reference(struct precache_output *p) |
michael@0 | 906 | { |
michael@0 | 907 | qcms_atomic_increment(p->ref_count); |
michael@0 | 908 | return p; |
michael@0 | 909 | } |
michael@0 | 910 | |
michael@0 | 911 | static struct precache_output *precache_create() |
michael@0 | 912 | { |
michael@0 | 913 | struct precache_output *p = malloc(sizeof(struct precache_output)); |
michael@0 | 914 | if (p) |
michael@0 | 915 | p->ref_count = 1; |
michael@0 | 916 | return p; |
michael@0 | 917 | } |
michael@0 | 918 | |
michael@0 | 919 | void precache_release(struct precache_output *p) |
michael@0 | 920 | { |
michael@0 | 921 | if (qcms_atomic_decrement(p->ref_count) == 0) { |
michael@0 | 922 | free(p); |
michael@0 | 923 | } |
michael@0 | 924 | } |
michael@0 | 925 | |
michael@0 | 926 | #ifdef HAS_POSIX_MEMALIGN |
michael@0 | 927 | static qcms_transform *transform_alloc(void) |
michael@0 | 928 | { |
michael@0 | 929 | qcms_transform *t; |
michael@0 | 930 | if (!posix_memalign(&t, 16, sizeof(*t))) { |
michael@0 | 931 | return t; |
michael@0 | 932 | } else { |
michael@0 | 933 | return NULL; |
michael@0 | 934 | } |
michael@0 | 935 | } |
michael@0 | 936 | static void transform_free(qcms_transform *t) |
michael@0 | 937 | { |
michael@0 | 938 | free(t); |
michael@0 | 939 | } |
michael@0 | 940 | #else |
michael@0 | 941 | static qcms_transform *transform_alloc(void) |
michael@0 | 942 | { |
michael@0 | 943 | /* transform needs to be aligned on a 16byte boundrary */ |
michael@0 | 944 | char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1); |
michael@0 | 945 | /* make room for a pointer to the block returned by calloc */ |
michael@0 | 946 | void *transform_start = original_block + sizeof(void*); |
michael@0 | 947 | /* align transform_start */ |
michael@0 | 948 | qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf); |
michael@0 | 949 | |
michael@0 | 950 | /* store a pointer to the block returned by calloc so that we can free it later */ |
michael@0 | 951 | void **(original_block_ptr) = (void**)transform_aligned; |
michael@0 | 952 | if (!original_block) |
michael@0 | 953 | return NULL; |
michael@0 | 954 | original_block_ptr--; |
michael@0 | 955 | *original_block_ptr = original_block; |
michael@0 | 956 | |
michael@0 | 957 | return transform_aligned; |
michael@0 | 958 | } |
michael@0 | 959 | static void transform_free(qcms_transform *t) |
michael@0 | 960 | { |
michael@0 | 961 | /* get at the pointer to the unaligned block returned by calloc */ |
michael@0 | 962 | void **p = (void**)t; |
michael@0 | 963 | p--; |
michael@0 | 964 | free(*p); |
michael@0 | 965 | } |
michael@0 | 966 | #endif |
michael@0 | 967 | |
michael@0 | 968 | void qcms_transform_release(qcms_transform *t) |
michael@0 | 969 | { |
michael@0 | 970 | /* ensure we only free the gamma tables once even if there are |
michael@0 | 971 | * multiple references to the same data */ |
michael@0 | 972 | |
michael@0 | 973 | if (t->output_table_r) |
michael@0 | 974 | precache_release(t->output_table_r); |
michael@0 | 975 | if (t->output_table_g) |
michael@0 | 976 | precache_release(t->output_table_g); |
michael@0 | 977 | if (t->output_table_b) |
michael@0 | 978 | precache_release(t->output_table_b); |
michael@0 | 979 | |
michael@0 | 980 | free(t->input_gamma_table_r); |
michael@0 | 981 | if (t->input_gamma_table_g != t->input_gamma_table_r) |
michael@0 | 982 | free(t->input_gamma_table_g); |
michael@0 | 983 | if (t->input_gamma_table_g != t->input_gamma_table_r && |
michael@0 | 984 | t->input_gamma_table_g != t->input_gamma_table_b) |
michael@0 | 985 | free(t->input_gamma_table_b); |
michael@0 | 986 | |
michael@0 | 987 | free(t->input_gamma_table_gray); |
michael@0 | 988 | |
michael@0 | 989 | free(t->output_gamma_lut_r); |
michael@0 | 990 | free(t->output_gamma_lut_g); |
michael@0 | 991 | free(t->output_gamma_lut_b); |
michael@0 | 992 | |
michael@0 | 993 | transform_free(t); |
michael@0 | 994 | } |
michael@0 | 995 | |
michael@0 | 996 | #ifdef X86 |
michael@0 | 997 | // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in |
michael@0 | 998 | // mozilla/jpeg) |
michael@0 | 999 | // ------------------------------------------------------------------------- |
michael@0 | 1000 | #if defined(_M_IX86) && defined(_MSC_VER) |
michael@0 | 1001 | #define HAS_CPUID |
michael@0 | 1002 | /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC |
michael@0 | 1003 | register - I'm not sure if that ever happens on windows, but cpuid isn't |
michael@0 | 1004 | on the critical path so we just preserve the register to be safe and to be |
michael@0 | 1005 | consistent with the non-windows version. */ |
michael@0 | 1006 | static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { |
michael@0 | 1007 | uint32_t a_, b_, c_, d_; |
michael@0 | 1008 | __asm { |
michael@0 | 1009 | xchg ebx, esi |
michael@0 | 1010 | mov eax, fxn |
michael@0 | 1011 | cpuid |
michael@0 | 1012 | mov a_, eax |
michael@0 | 1013 | mov b_, ebx |
michael@0 | 1014 | mov c_, ecx |
michael@0 | 1015 | mov d_, edx |
michael@0 | 1016 | xchg ebx, esi |
michael@0 | 1017 | } |
michael@0 | 1018 | *a = a_; |
michael@0 | 1019 | *b = b_; |
michael@0 | 1020 | *c = c_; |
michael@0 | 1021 | *d = d_; |
michael@0 | 1022 | } |
michael@0 | 1023 | #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386)) |
michael@0 | 1024 | #define HAS_CPUID |
michael@0 | 1025 | /* Get us a CPUID function. We can't use ebx because it's the PIC register on |
michael@0 | 1026 | some platforms, so we use ESI instead and save ebx to avoid clobbering it. */ |
michael@0 | 1027 | static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { |
michael@0 | 1028 | |
michael@0 | 1029 | uint32_t a_, b_, c_, d_; |
michael@0 | 1030 | __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;" |
michael@0 | 1031 | : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn)); |
michael@0 | 1032 | *a = a_; |
michael@0 | 1033 | *b = b_; |
michael@0 | 1034 | *c = c_; |
michael@0 | 1035 | *d = d_; |
michael@0 | 1036 | } |
michael@0 | 1037 | #endif |
michael@0 | 1038 | |
michael@0 | 1039 | // -------------------------Runtime SSEx Detection----------------------------- |
michael@0 | 1040 | |
michael@0 | 1041 | /* MMX is always supported per |
michael@0 | 1042 | * Gecko v1.9.1 minimum CPU requirements */ |
michael@0 | 1043 | #define SSE1_EDX_MASK (1UL << 25) |
michael@0 | 1044 | #define SSE2_EDX_MASK (1UL << 26) |
michael@0 | 1045 | #define SSE3_ECX_MASK (1UL << 0) |
michael@0 | 1046 | |
michael@0 | 1047 | static int sse_version_available(void) |
michael@0 | 1048 | { |
michael@0 | 1049 | #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
michael@0 | 1050 | /* we know at build time that 64-bit CPUs always have SSE2 |
michael@0 | 1051 | * this tells the compiler that non-SSE2 branches will never be |
michael@0 | 1052 | * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */ |
michael@0 | 1053 | return 2; |
michael@0 | 1054 | #elif defined(HAS_CPUID) |
michael@0 | 1055 | static int sse_version = -1; |
michael@0 | 1056 | uint32_t a, b, c, d; |
michael@0 | 1057 | uint32_t function = 0x00000001; |
michael@0 | 1058 | |
michael@0 | 1059 | if (sse_version == -1) { |
michael@0 | 1060 | sse_version = 0; |
michael@0 | 1061 | cpuid(function, &a, &b, &c, &d); |
michael@0 | 1062 | if (c & SSE3_ECX_MASK) |
michael@0 | 1063 | sse_version = 3; |
michael@0 | 1064 | else if (d & SSE2_EDX_MASK) |
michael@0 | 1065 | sse_version = 2; |
michael@0 | 1066 | else if (d & SSE1_EDX_MASK) |
michael@0 | 1067 | sse_version = 1; |
michael@0 | 1068 | } |
michael@0 | 1069 | |
michael@0 | 1070 | return sse_version; |
michael@0 | 1071 | #else |
michael@0 | 1072 | return 0; |
michael@0 | 1073 | #endif |
michael@0 | 1074 | } |
michael@0 | 1075 | #endif |
michael@0 | 1076 | |
michael@0 | 1077 | static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f}, |
michael@0 | 1078 | {-0.7502f, 1.7135f, 0.0367f}, |
michael@0 | 1079 | { 0.0389f,-0.0685f, 1.0296f}}, |
michael@0 | 1080 | false}; |
michael@0 | 1081 | |
michael@0 | 1082 | static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f}, |
michael@0 | 1083 | { 0.4323053f, 0.5183603f, 0.0492912f}, |
michael@0 | 1084 | {-0.0085287f, 0.0400428f, 0.9684867f}}, |
michael@0 | 1085 | false}; |
michael@0 | 1086 | |
michael@0 | 1087 | // See ICCv4 E.3 |
michael@0 | 1088 | struct matrix compute_whitepoint_adaption(float X, float Y, float Z) { |
michael@0 | 1089 | float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) / |
michael@0 | 1090 | (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] ); |
michael@0 | 1091 | float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) / |
michael@0 | 1092 | (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] ); |
michael@0 | 1093 | float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) / |
michael@0 | 1094 | (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] ); |
michael@0 | 1095 | struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false}; |
michael@0 | 1096 | return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) ); |
michael@0 | 1097 | } |
michael@0 | 1098 | |
michael@0 | 1099 | void qcms_profile_precache_output_transform(qcms_profile *profile) |
michael@0 | 1100 | { |
michael@0 | 1101 | /* we only support precaching on rgb profiles */ |
michael@0 | 1102 | if (profile->color_space != RGB_SIGNATURE) |
michael@0 | 1103 | return; |
michael@0 | 1104 | |
michael@0 | 1105 | if (qcms_supports_iccv4) { |
michael@0 | 1106 | /* don't precache since we will use the B2A LUT */ |
michael@0 | 1107 | if (profile->B2A0) |
michael@0 | 1108 | return; |
michael@0 | 1109 | |
michael@0 | 1110 | /* don't precache since we will use the mBA LUT */ |
michael@0 | 1111 | if (profile->mBA) |
michael@0 | 1112 | return; |
michael@0 | 1113 | } |
michael@0 | 1114 | |
michael@0 | 1115 | /* don't precache if we do not have the TRC curves */ |
michael@0 | 1116 | if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC) |
michael@0 | 1117 | return; |
michael@0 | 1118 | |
michael@0 | 1119 | if (!profile->output_table_r) { |
michael@0 | 1120 | profile->output_table_r = precache_create(); |
michael@0 | 1121 | if (profile->output_table_r && |
michael@0 | 1122 | !compute_precache(profile->redTRC, profile->output_table_r->data)) { |
michael@0 | 1123 | precache_release(profile->output_table_r); |
michael@0 | 1124 | profile->output_table_r = NULL; |
michael@0 | 1125 | } |
michael@0 | 1126 | } |
michael@0 | 1127 | if (!profile->output_table_g) { |
michael@0 | 1128 | profile->output_table_g = precache_create(); |
michael@0 | 1129 | if (profile->output_table_g && |
michael@0 | 1130 | !compute_precache(profile->greenTRC, profile->output_table_g->data)) { |
michael@0 | 1131 | precache_release(profile->output_table_g); |
michael@0 | 1132 | profile->output_table_g = NULL; |
michael@0 | 1133 | } |
michael@0 | 1134 | } |
michael@0 | 1135 | if (!profile->output_table_b) { |
michael@0 | 1136 | profile->output_table_b = precache_create(); |
michael@0 | 1137 | if (profile->output_table_b && |
michael@0 | 1138 | !compute_precache(profile->blueTRC, profile->output_table_b->data)) { |
michael@0 | 1139 | precache_release(profile->output_table_b); |
michael@0 | 1140 | profile->output_table_b = NULL; |
michael@0 | 1141 | } |
michael@0 | 1142 | } |
michael@0 | 1143 | } |
michael@0 | 1144 | |
michael@0 | 1145 | /* Replace the current transformation with a LUT transformation using a given number of sample points */ |
michael@0 | 1146 | qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out, |
michael@0 | 1147 | int samples, qcms_data_type in_type) |
michael@0 | 1148 | { |
michael@0 | 1149 | /* The range between which 2 consecutive sample points can be used to interpolate */ |
michael@0 | 1150 | uint16_t x,y,z; |
michael@0 | 1151 | uint32_t l; |
michael@0 | 1152 | uint32_t lutSize = 3 * samples * samples * samples; |
michael@0 | 1153 | float* src = NULL; |
michael@0 | 1154 | float* dest = NULL; |
michael@0 | 1155 | float* lut = NULL; |
michael@0 | 1156 | |
michael@0 | 1157 | src = malloc(lutSize*sizeof(float)); |
michael@0 | 1158 | dest = malloc(lutSize*sizeof(float)); |
michael@0 | 1159 | |
michael@0 | 1160 | if (src && dest) { |
michael@0 | 1161 | /* Prepare a list of points we want to sample */ |
michael@0 | 1162 | l = 0; |
michael@0 | 1163 | for (x = 0; x < samples; x++) { |
michael@0 | 1164 | for (y = 0; y < samples; y++) { |
michael@0 | 1165 | for (z = 0; z < samples; z++) { |
michael@0 | 1166 | src[l++] = x / (float)(samples-1); |
michael@0 | 1167 | src[l++] = y / (float)(samples-1); |
michael@0 | 1168 | src[l++] = z / (float)(samples-1); |
michael@0 | 1169 | } |
michael@0 | 1170 | } |
michael@0 | 1171 | } |
michael@0 | 1172 | |
michael@0 | 1173 | lut = qcms_chain_transform(in, out, src, dest, lutSize); |
michael@0 | 1174 | if (lut) { |
michael@0 | 1175 | transform->r_clut = &lut[0]; |
michael@0 | 1176 | transform->g_clut = &lut[1]; |
michael@0 | 1177 | transform->b_clut = &lut[2]; |
michael@0 | 1178 | transform->grid_size = samples; |
michael@0 | 1179 | if (in_type == QCMS_DATA_RGBA_8) { |
michael@0 | 1180 | transform->transform_fn = qcms_transform_data_tetra_clut_rgba; |
michael@0 | 1181 | } else { |
michael@0 | 1182 | transform->transform_fn = qcms_transform_data_tetra_clut; |
michael@0 | 1183 | } |
michael@0 | 1184 | } |
michael@0 | 1185 | } |
michael@0 | 1186 | |
michael@0 | 1187 | |
michael@0 | 1188 | //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed |
michael@0 | 1189 | if (src && lut != src) { |
michael@0 | 1190 | free(src); |
michael@0 | 1191 | } |
michael@0 | 1192 | if (dest && lut != dest) { |
michael@0 | 1193 | free(dest); |
michael@0 | 1194 | } |
michael@0 | 1195 | |
michael@0 | 1196 | if (lut == NULL) { |
michael@0 | 1197 | return NULL; |
michael@0 | 1198 | } |
michael@0 | 1199 | return transform; |
michael@0 | 1200 | } |
michael@0 | 1201 | |
michael@0 | 1202 | #define NO_MEM_TRANSFORM NULL |
michael@0 | 1203 | |
michael@0 | 1204 | qcms_transform* qcms_transform_create( |
michael@0 | 1205 | qcms_profile *in, qcms_data_type in_type, |
michael@0 | 1206 | qcms_profile *out, qcms_data_type out_type, |
michael@0 | 1207 | qcms_intent intent) |
michael@0 | 1208 | { |
michael@0 | 1209 | bool precache = false; |
michael@0 | 1210 | |
michael@0 | 1211 | qcms_transform *transform = transform_alloc(); |
michael@0 | 1212 | if (!transform) { |
michael@0 | 1213 | return NULL; |
michael@0 | 1214 | } |
michael@0 | 1215 | if (out_type != QCMS_DATA_RGB_8 && |
michael@0 | 1216 | out_type != QCMS_DATA_RGBA_8) { |
michael@0 | 1217 | assert(0 && "output type"); |
michael@0 | 1218 | transform_free(transform); |
michael@0 | 1219 | return NULL; |
michael@0 | 1220 | } |
michael@0 | 1221 | |
michael@0 | 1222 | if (out->output_table_r && |
michael@0 | 1223 | out->output_table_g && |
michael@0 | 1224 | out->output_table_b) { |
michael@0 | 1225 | precache = true; |
michael@0 | 1226 | } |
michael@0 | 1227 | |
michael@0 | 1228 | // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance) |
michael@0 | 1229 | if (qcms_supports_iccv4 && |
michael@0 | 1230 | (in_type == QCMS_DATA_RGB_8 || in_type == QCMS_DATA_RGBA_8) && |
michael@0 | 1231 | (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) |
michael@0 | 1232 | { |
michael@0 | 1233 | // Precache the transformation to a CLUT 33x33x33 in size. |
michael@0 | 1234 | // 33 is used by many profiles and works well in pratice. |
michael@0 | 1235 | // This evenly divides 256 into blocks of 8x8x8. |
michael@0 | 1236 | // TODO For transforming small data sets of about 200x200 or less |
michael@0 | 1237 | // precaching should be avoided. |
michael@0 | 1238 | qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type); |
michael@0 | 1239 | if (!result) { |
michael@0 | 1240 | assert(0 && "precacheLUT failed"); |
michael@0 | 1241 | transform_free(transform); |
michael@0 | 1242 | return NULL; |
michael@0 | 1243 | } |
michael@0 | 1244 | return result; |
michael@0 | 1245 | } |
michael@0 | 1246 | |
michael@0 | 1247 | if (precache) { |
michael@0 | 1248 | transform->output_table_r = precache_reference(out->output_table_r); |
michael@0 | 1249 | transform->output_table_g = precache_reference(out->output_table_g); |
michael@0 | 1250 | transform->output_table_b = precache_reference(out->output_table_b); |
michael@0 | 1251 | } else { |
michael@0 | 1252 | if (!out->redTRC || !out->greenTRC || !out->blueTRC) { |
michael@0 | 1253 | qcms_transform_release(transform); |
michael@0 | 1254 | return NO_MEM_TRANSFORM; |
michael@0 | 1255 | } |
michael@0 | 1256 | build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length); |
michael@0 | 1257 | build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length); |
michael@0 | 1258 | build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length); |
michael@0 | 1259 | if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) { |
michael@0 | 1260 | qcms_transform_release(transform); |
michael@0 | 1261 | return NO_MEM_TRANSFORM; |
michael@0 | 1262 | } |
michael@0 | 1263 | } |
michael@0 | 1264 | |
michael@0 | 1265 | if (in->color_space == RGB_SIGNATURE) { |
michael@0 | 1266 | struct matrix in_matrix, out_matrix, result; |
michael@0 | 1267 | |
michael@0 | 1268 | if (in_type != QCMS_DATA_RGB_8 && |
michael@0 | 1269 | in_type != QCMS_DATA_RGBA_8){ |
michael@0 | 1270 | assert(0 && "input type"); |
michael@0 | 1271 | transform_free(transform); |
michael@0 | 1272 | return NULL; |
michael@0 | 1273 | } |
michael@0 | 1274 | if (precache) { |
michael@0 | 1275 | #ifdef X86 |
michael@0 | 1276 | if (sse_version_available() >= 2) { |
michael@0 | 1277 | if (in_type == QCMS_DATA_RGB_8) |
michael@0 | 1278 | transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2; |
michael@0 | 1279 | else |
michael@0 | 1280 | transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2; |
michael@0 | 1281 | |
michael@0 | 1282 | #if !(defined(_MSC_VER) && defined(_M_AMD64)) |
michael@0 | 1283 | /* Microsoft Compiler for x64 doesn't support MMX. |
michael@0 | 1284 | * SSE code uses MMX so that we disable on x64 */ |
michael@0 | 1285 | } else |
michael@0 | 1286 | if (sse_version_available() >= 1) { |
michael@0 | 1287 | if (in_type == QCMS_DATA_RGB_8) |
michael@0 | 1288 | transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1; |
michael@0 | 1289 | else |
michael@0 | 1290 | transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1; |
michael@0 | 1291 | #endif |
michael@0 | 1292 | } else |
michael@0 | 1293 | #endif |
michael@0 | 1294 | #if (defined(__POWERPC__) || defined(__powerpc__)) |
michael@0 | 1295 | if (have_altivec()) { |
michael@0 | 1296 | if (in_type == QCMS_DATA_RGB_8) |
michael@0 | 1297 | transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec; |
michael@0 | 1298 | else |
michael@0 | 1299 | transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec; |
michael@0 | 1300 | } else |
michael@0 | 1301 | #endif |
michael@0 | 1302 | { |
michael@0 | 1303 | if (in_type == QCMS_DATA_RGB_8) |
michael@0 | 1304 | transform->transform_fn = qcms_transform_data_rgb_out_lut_precache; |
michael@0 | 1305 | else |
michael@0 | 1306 | transform->transform_fn = qcms_transform_data_rgba_out_lut_precache; |
michael@0 | 1307 | } |
michael@0 | 1308 | } else { |
michael@0 | 1309 | if (in_type == QCMS_DATA_RGB_8) |
michael@0 | 1310 | transform->transform_fn = qcms_transform_data_rgb_out_lut; |
michael@0 | 1311 | else |
michael@0 | 1312 | transform->transform_fn = qcms_transform_data_rgba_out_lut; |
michael@0 | 1313 | } |
michael@0 | 1314 | |
michael@0 | 1315 | //XXX: avoid duplicating tables if we can |
michael@0 | 1316 | transform->input_gamma_table_r = build_input_gamma_table(in->redTRC); |
michael@0 | 1317 | transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC); |
michael@0 | 1318 | transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC); |
michael@0 | 1319 | if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) { |
michael@0 | 1320 | qcms_transform_release(transform); |
michael@0 | 1321 | return NO_MEM_TRANSFORM; |
michael@0 | 1322 | } |
michael@0 | 1323 | |
michael@0 | 1324 | |
michael@0 | 1325 | /* build combined colorant matrix */ |
michael@0 | 1326 | in_matrix = build_colorant_matrix(in); |
michael@0 | 1327 | out_matrix = build_colorant_matrix(out); |
michael@0 | 1328 | out_matrix = matrix_invert(out_matrix); |
michael@0 | 1329 | if (out_matrix.invalid) { |
michael@0 | 1330 | qcms_transform_release(transform); |
michael@0 | 1331 | return NULL; |
michael@0 | 1332 | } |
michael@0 | 1333 | result = matrix_multiply(out_matrix, in_matrix); |
michael@0 | 1334 | |
michael@0 | 1335 | /* store the results in column major mode |
michael@0 | 1336 | * this makes doing the multiplication with sse easier */ |
michael@0 | 1337 | transform->matrix[0][0] = result.m[0][0]; |
michael@0 | 1338 | transform->matrix[1][0] = result.m[0][1]; |
michael@0 | 1339 | transform->matrix[2][0] = result.m[0][2]; |
michael@0 | 1340 | transform->matrix[0][1] = result.m[1][0]; |
michael@0 | 1341 | transform->matrix[1][1] = result.m[1][1]; |
michael@0 | 1342 | transform->matrix[2][1] = result.m[1][2]; |
michael@0 | 1343 | transform->matrix[0][2] = result.m[2][0]; |
michael@0 | 1344 | transform->matrix[1][2] = result.m[2][1]; |
michael@0 | 1345 | transform->matrix[2][2] = result.m[2][2]; |
michael@0 | 1346 | |
michael@0 | 1347 | } else if (in->color_space == GRAY_SIGNATURE) { |
michael@0 | 1348 | if (in_type != QCMS_DATA_GRAY_8 && |
michael@0 | 1349 | in_type != QCMS_DATA_GRAYA_8){ |
michael@0 | 1350 | assert(0 && "input type"); |
michael@0 | 1351 | transform_free(transform); |
michael@0 | 1352 | return NULL; |
michael@0 | 1353 | } |
michael@0 | 1354 | |
michael@0 | 1355 | transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC); |
michael@0 | 1356 | if (!transform->input_gamma_table_gray) { |
michael@0 | 1357 | qcms_transform_release(transform); |
michael@0 | 1358 | return NO_MEM_TRANSFORM; |
michael@0 | 1359 | } |
michael@0 | 1360 | |
michael@0 | 1361 | if (precache) { |
michael@0 | 1362 | if (in_type == QCMS_DATA_GRAY_8) { |
michael@0 | 1363 | transform->transform_fn = qcms_transform_data_gray_out_precache; |
michael@0 | 1364 | } else { |
michael@0 | 1365 | transform->transform_fn = qcms_transform_data_graya_out_precache; |
michael@0 | 1366 | } |
michael@0 | 1367 | } else { |
michael@0 | 1368 | if (in_type == QCMS_DATA_GRAY_8) { |
michael@0 | 1369 | transform->transform_fn = qcms_transform_data_gray_out_lut; |
michael@0 | 1370 | } else { |
michael@0 | 1371 | transform->transform_fn = qcms_transform_data_graya_out_lut; |
michael@0 | 1372 | } |
michael@0 | 1373 | } |
michael@0 | 1374 | } else { |
michael@0 | 1375 | assert(0 && "unexpected colorspace"); |
michael@0 | 1376 | transform_free(transform); |
michael@0 | 1377 | return NULL; |
michael@0 | 1378 | } |
michael@0 | 1379 | return transform; |
michael@0 | 1380 | } |
michael@0 | 1381 | |
michael@0 | 1382 | #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) |
michael@0 | 1383 | /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */ |
michael@0 | 1384 | __attribute__((__force_align_arg_pointer__)) |
michael@0 | 1385 | #endif |
michael@0 | 1386 | void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length) |
michael@0 | 1387 | { |
michael@0 | 1388 | transform->transform_fn(transform, src, dest, length); |
michael@0 | 1389 | } |
michael@0 | 1390 | |
michael@0 | 1391 | qcms_bool qcms_supports_iccv4; |
michael@0 | 1392 | void qcms_enable_iccv4() |
michael@0 | 1393 | { |
michael@0 | 1394 | qcms_supports_iccv4 = true; |
michael@0 | 1395 | } |