gfx/cairo/libpixman/src/pixman-combine32.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-combine32.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2504 @@
     1.4 +/*
     1.5 + * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
     1.6 + *             2005 Lars Knoll & Zack Rusin, Trolltech
     1.7 + *
     1.8 + * Permission to use, copy, modify, distribute, and sell this software and its
     1.9 + * documentation for any purpose is hereby granted without fee, provided that
    1.10 + * the above copyright notice appear in all copies and that both that
    1.11 + * copyright notice and this permission notice appear in supporting
    1.12 + * documentation, and that the name of Keith Packard not be used in
    1.13 + * advertising or publicity pertaining to distribution of the software without
    1.14 + * specific, written prior permission.  Keith Packard makes no
    1.15 + * representations about the suitability of this software for any purpose.  It
    1.16 + * is provided "as is" without express or implied warranty.
    1.17 + *
    1.18 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
    1.19 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    1.20 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
    1.21 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    1.22 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
    1.23 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
    1.24 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    1.25 + * SOFTWARE.
    1.26 + */
    1.27 +#ifdef HAVE_CONFIG_H
    1.28 +#include <config.h>
    1.29 +#endif
    1.30 +
    1.31 +#include <math.h>
    1.32 +#include <string.h>
    1.33 +
    1.34 +#include "pixman-private.h"
    1.35 +#include "pixman-combine32.h"
    1.36 +
    1.37 +/* component alpha helper functions */
    1.38 +
    1.39 +static void
    1.40 +combine_mask_ca (uint32_t *src, uint32_t *mask)
    1.41 +{
    1.42 +    uint32_t a = *mask;
    1.43 +
    1.44 +    uint32_t x;
    1.45 +    uint16_t xa;
    1.46 +
    1.47 +    if (!a)
    1.48 +    {
    1.49 +	*(src) = 0;
    1.50 +	return;
    1.51 +    }
    1.52 +
    1.53 +    x = *(src);
    1.54 +    if (a == ~0)
    1.55 +    {
    1.56 +	x = x >> A_SHIFT;
    1.57 +	x |= x << G_SHIFT;
    1.58 +	x |= x << R_SHIFT;
    1.59 +	*(mask) = x;
    1.60 +	return;
    1.61 +    }
    1.62 +
    1.63 +    xa = x >> A_SHIFT;
    1.64 +    UN8x4_MUL_UN8x4 (x, a);
    1.65 +    *(src) = x;
    1.66 +    
    1.67 +    UN8x4_MUL_UN8 (a, xa);
    1.68 +    *(mask) = a;
    1.69 +}
    1.70 +
    1.71 +static void
    1.72 +combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
    1.73 +{
    1.74 +    uint32_t a = *mask;
    1.75 +    uint32_t x;
    1.76 +
    1.77 +    if (!a)
    1.78 +    {
    1.79 +	*(src) = 0;
    1.80 +	return;
    1.81 +    }
    1.82 +
    1.83 +    if (a == ~0)
    1.84 +	return;
    1.85 +
    1.86 +    x = *(src);
    1.87 +    UN8x4_MUL_UN8x4 (x, a);
    1.88 +    *(src) = x;
    1.89 +}
    1.90 +
    1.91 +static void
    1.92 +combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
    1.93 +{
    1.94 +    uint32_t a = *(mask);
    1.95 +    uint32_t x;
    1.96 +
    1.97 +    if (!a)
    1.98 +	return;
    1.99 +
   1.100 +    x = *(src) >> A_SHIFT;
   1.101 +    if (x == MASK)
   1.102 +	return;
   1.103 +
   1.104 +    if (a == ~0)
   1.105 +    {
   1.106 +	x |= x << G_SHIFT;
   1.107 +	x |= x << R_SHIFT;
   1.108 +	*(mask) = x;
   1.109 +	return;
   1.110 +    }
   1.111 +
   1.112 +    UN8x4_MUL_UN8 (a, x);
   1.113 +    *(mask) = a;
   1.114 +}
   1.115 +
   1.116 +/*
   1.117 + * There are two ways of handling alpha -- either as a single unified value or
   1.118 + * a separate value for each component, hence each macro must have two
   1.119 + * versions.  The unified alpha version has a 'u' at the end of the name,
   1.120 + * the component version has a 'ca'.  Similarly, functions which deal with
   1.121 + * this difference will have two versions using the same convention.
   1.122 + */
   1.123 +
   1.124 +static force_inline uint32_t
   1.125 +combine_mask (const uint32_t *src, const uint32_t *mask, int i)
   1.126 +{
   1.127 +    uint32_t s, m;
   1.128 +
   1.129 +    if (mask)
   1.130 +    {
   1.131 +	m = *(mask + i) >> A_SHIFT;
   1.132 +
   1.133 +	if (!m)
   1.134 +	    return 0;
   1.135 +    }
   1.136 +
   1.137 +    s = *(src + i);
   1.138 +
   1.139 +    if (mask)
   1.140 +	UN8x4_MUL_UN8 (s, m);
   1.141 +
   1.142 +    return s;
   1.143 +}
   1.144 +
   1.145 +static void
   1.146 +combine_clear (pixman_implementation_t *imp,
   1.147 +               pixman_op_t              op,
   1.148 +               uint32_t *                dest,
   1.149 +               const uint32_t *          src,
   1.150 +               const uint32_t *          mask,
   1.151 +               int                      width)
   1.152 +{
   1.153 +    memset (dest, 0, width * sizeof(uint32_t));
   1.154 +}
   1.155 +
   1.156 +static void
   1.157 +combine_dst (pixman_implementation_t *imp,
   1.158 +	     pixman_op_t	      op,
   1.159 +	     uint32_t *		      dest,
   1.160 +	     const uint32_t *	      src,
   1.161 +	     const uint32_t *          mask,
   1.162 +	     int		      width)
   1.163 +{
   1.164 +    return;
   1.165 +}
   1.166 +
   1.167 +static void
   1.168 +combine_src_u (pixman_implementation_t *imp,
   1.169 +               pixman_op_t              op,
   1.170 +               uint32_t *                dest,
   1.171 +               const uint32_t *          src,
   1.172 +               const uint32_t *          mask,
   1.173 +               int                      width)
   1.174 +{
   1.175 +    int i;
   1.176 +
   1.177 +    if (!mask)
   1.178 +    {
   1.179 +	memcpy (dest, src, width * sizeof (uint32_t));
   1.180 +    }
   1.181 +    else
   1.182 +    {
   1.183 +	for (i = 0; i < width; ++i)
   1.184 +	{
   1.185 +	    uint32_t s = combine_mask (src, mask, i);
   1.186 +
   1.187 +	    *(dest + i) = s;
   1.188 +	}
   1.189 +    }
   1.190 +}
   1.191 +
   1.192 +static void
   1.193 +combine_over_u (pixman_implementation_t *imp,
   1.194 +                pixman_op_t              op,
   1.195 +                uint32_t *                dest,
   1.196 +                const uint32_t *          src,
   1.197 +                const uint32_t *          mask,
   1.198 +                int                      width)
   1.199 +{
   1.200 +    int i;
   1.201 +
   1.202 +    if (!mask)
   1.203 +    {
   1.204 +	for (i = 0; i < width; ++i)
   1.205 +	{
   1.206 +	    uint32_t s = *(src + i);
   1.207 +	    uint32_t a = ALPHA_8 (s);
   1.208 +	    if (a == 0xFF)
   1.209 +	    {
   1.210 +		*(dest + i) = s;
   1.211 +	    }
   1.212 +	    else if (s)
   1.213 +	    {
   1.214 +		uint32_t d = *(dest + i);
   1.215 +		uint32_t ia = a ^ 0xFF;
   1.216 +		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
   1.217 +		*(dest + i) = d;
   1.218 +	    }
   1.219 +	}
   1.220 +    }
   1.221 +    else
   1.222 +    {
   1.223 +	for (i = 0; i < width; ++i)
   1.224 +	{
   1.225 +	    uint32_t m = ALPHA_8 (*(mask + i));
   1.226 +	    if (m == 0xFF)
   1.227 +	    {
   1.228 +		uint32_t s = *(src + i);
   1.229 +		uint32_t a = ALPHA_8 (s);
   1.230 +		if (a == 0xFF)
   1.231 +		{
   1.232 +		    *(dest + i) = s;
   1.233 +		}
   1.234 +		else if (s)
   1.235 +		{
   1.236 +		    uint32_t d = *(dest + i);
   1.237 +		    uint32_t ia = a ^ 0xFF;
   1.238 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
   1.239 +		    *(dest + i) = d;
   1.240 +		}
   1.241 +	    }
   1.242 +	    else if (m)
   1.243 +	    {
   1.244 +		uint32_t s = *(src + i);
   1.245 +		if (s)
   1.246 +		{
   1.247 +		    uint32_t d = *(dest + i);
   1.248 +		    UN8x4_MUL_UN8 (s, m);
   1.249 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
   1.250 +		    *(dest + i) = d;
   1.251 +		}
   1.252 +	    }
   1.253 +	}
   1.254 +    }
   1.255 +}
   1.256 +
   1.257 +static void
   1.258 +combine_over_reverse_u (pixman_implementation_t *imp,
   1.259 +                        pixman_op_t              op,
   1.260 +                        uint32_t *                dest,
   1.261 +                        const uint32_t *          src,
   1.262 +                        const uint32_t *          mask,
   1.263 +                        int                      width)
   1.264 +{
   1.265 +    int i;
   1.266 +
   1.267 +    for (i = 0; i < width; ++i)
   1.268 +    {
   1.269 +	uint32_t s = combine_mask (src, mask, i);
   1.270 +	uint32_t d = *(dest + i);
   1.271 +	uint32_t ia = ALPHA_8 (~*(dest + i));
   1.272 +	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
   1.273 +	*(dest + i) = s;
   1.274 +    }
   1.275 +}
   1.276 +
   1.277 +static void
   1.278 +combine_in_u (pixman_implementation_t *imp,
   1.279 +              pixman_op_t              op,
   1.280 +              uint32_t *                dest,
   1.281 +              const uint32_t *          src,
   1.282 +              const uint32_t *          mask,
   1.283 +              int                      width)
   1.284 +{
   1.285 +    int i;
   1.286 +
   1.287 +    for (i = 0; i < width; ++i)
   1.288 +    {
   1.289 +	uint32_t s = combine_mask (src, mask, i);
   1.290 +	uint32_t a = ALPHA_8 (*(dest + i));
   1.291 +	UN8x4_MUL_UN8 (s, a);
   1.292 +	*(dest + i) = s;
   1.293 +    }
   1.294 +}
   1.295 +
   1.296 +static void
   1.297 +combine_in_reverse_u (pixman_implementation_t *imp,
   1.298 +                      pixman_op_t              op,
   1.299 +                      uint32_t *                dest,
   1.300 +                      const uint32_t *          src,
   1.301 +                      const uint32_t *          mask,
   1.302 +                      int                      width)
   1.303 +{
   1.304 +    int i;
   1.305 +
   1.306 +    for (i = 0; i < width; ++i)
   1.307 +    {
   1.308 +	uint32_t s = combine_mask (src, mask, i);
   1.309 +	uint32_t d = *(dest + i);
   1.310 +	uint32_t a = ALPHA_8 (s);
   1.311 +	UN8x4_MUL_UN8 (d, a);
   1.312 +	*(dest + i) = d;
   1.313 +    }
   1.314 +}
   1.315 +
   1.316 +static void
   1.317 +combine_out_u (pixman_implementation_t *imp,
   1.318 +               pixman_op_t              op,
   1.319 +               uint32_t *                dest,
   1.320 +               const uint32_t *          src,
   1.321 +               const uint32_t *          mask,
   1.322 +               int                      width)
   1.323 +{
   1.324 +    int i;
   1.325 +
   1.326 +    for (i = 0; i < width; ++i)
   1.327 +    {
   1.328 +	uint32_t s = combine_mask (src, mask, i);
   1.329 +	uint32_t a = ALPHA_8 (~*(dest + i));
   1.330 +	UN8x4_MUL_UN8 (s, a);
   1.331 +	*(dest + i) = s;
   1.332 +    }
   1.333 +}
   1.334 +
   1.335 +static void
   1.336 +combine_out_reverse_u (pixman_implementation_t *imp,
   1.337 +                       pixman_op_t              op,
   1.338 +                       uint32_t *                dest,
   1.339 +                       const uint32_t *          src,
   1.340 +                       const uint32_t *          mask,
   1.341 +                       int                      width)
   1.342 +{
   1.343 +    int i;
   1.344 +
   1.345 +    for (i = 0; i < width; ++i)
   1.346 +    {
   1.347 +	uint32_t s = combine_mask (src, mask, i);
   1.348 +	uint32_t d = *(dest + i);
   1.349 +	uint32_t a = ALPHA_8 (~s);
   1.350 +	UN8x4_MUL_UN8 (d, a);
   1.351 +	*(dest + i) = d;
   1.352 +    }
   1.353 +}
   1.354 +
   1.355 +static void
   1.356 +combine_atop_u (pixman_implementation_t *imp,
   1.357 +                pixman_op_t              op,
   1.358 +                uint32_t *                dest,
   1.359 +                const uint32_t *          src,
   1.360 +                const uint32_t *          mask,
   1.361 +                int                      width)
   1.362 +{
   1.363 +    int i;
   1.364 +
   1.365 +    for (i = 0; i < width; ++i)
   1.366 +    {
   1.367 +	uint32_t s = combine_mask (src, mask, i);
   1.368 +	uint32_t d = *(dest + i);
   1.369 +	uint32_t dest_a = ALPHA_8 (d);
   1.370 +	uint32_t src_ia = ALPHA_8 (~s);
   1.371 +
   1.372 +	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
   1.373 +	*(dest + i) = s;
   1.374 +    }
   1.375 +}
   1.376 +
   1.377 +static void
   1.378 +combine_atop_reverse_u (pixman_implementation_t *imp,
   1.379 +                        pixman_op_t              op,
   1.380 +                        uint32_t *                dest,
   1.381 +                        const uint32_t *          src,
   1.382 +                        const uint32_t *          mask,
   1.383 +                        int                      width)
   1.384 +{
   1.385 +    int i;
   1.386 +
   1.387 +    for (i = 0; i < width; ++i)
   1.388 +    {
   1.389 +	uint32_t s = combine_mask (src, mask, i);
   1.390 +	uint32_t d = *(dest + i);
   1.391 +	uint32_t src_a = ALPHA_8 (s);
   1.392 +	uint32_t dest_ia = ALPHA_8 (~d);
   1.393 +
   1.394 +	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
   1.395 +	*(dest + i) = s;
   1.396 +    }
   1.397 +}
   1.398 +
   1.399 +static void
   1.400 +combine_xor_u (pixman_implementation_t *imp,
   1.401 +               pixman_op_t              op,
   1.402 +               uint32_t *                dest,
   1.403 +               const uint32_t *          src,
   1.404 +               const uint32_t *          mask,
   1.405 +               int                      width)
   1.406 +{
   1.407 +    int i;
   1.408 +
   1.409 +    for (i = 0; i < width; ++i)
   1.410 +    {
   1.411 +	uint32_t s = combine_mask (src, mask, i);
   1.412 +	uint32_t d = *(dest + i);
   1.413 +	uint32_t src_ia = ALPHA_8 (~s);
   1.414 +	uint32_t dest_ia = ALPHA_8 (~d);
   1.415 +
   1.416 +	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
   1.417 +	*(dest + i) = s;
   1.418 +    }
   1.419 +}
   1.420 +
   1.421 +static void
   1.422 +combine_add_u (pixman_implementation_t *imp,
   1.423 +               pixman_op_t              op,
   1.424 +               uint32_t *                dest,
   1.425 +               const uint32_t *          src,
   1.426 +               const uint32_t *          mask,
   1.427 +               int                      width)
   1.428 +{
   1.429 +    int i;
   1.430 +
   1.431 +    for (i = 0; i < width; ++i)
   1.432 +    {
   1.433 +	uint32_t s = combine_mask (src, mask, i);
   1.434 +	uint32_t d = *(dest + i);
   1.435 +	UN8x4_ADD_UN8x4 (d, s);
   1.436 +	*(dest + i) = d;
   1.437 +    }
   1.438 +}
   1.439 +
   1.440 +static void
   1.441 +combine_saturate_u (pixman_implementation_t *imp,
   1.442 +                    pixman_op_t              op,
   1.443 +                    uint32_t *                dest,
   1.444 +                    const uint32_t *          src,
   1.445 +                    const uint32_t *          mask,
   1.446 +                    int                      width)
   1.447 +{
   1.448 +    int i;
   1.449 +
   1.450 +    for (i = 0; i < width; ++i)
   1.451 +    {
   1.452 +	uint32_t s = combine_mask (src, mask, i);
   1.453 +	uint32_t d = *(dest + i);
   1.454 +	uint16_t sa, da;
   1.455 +
   1.456 +	sa = s >> A_SHIFT;
   1.457 +	da = ~d >> A_SHIFT;
   1.458 +	if (sa > da)
   1.459 +	{
   1.460 +	    sa = DIV_UN8 (da, sa);
   1.461 +	    UN8x4_MUL_UN8 (s, sa);
   1.462 +	}
   1.463 +	;
   1.464 +	UN8x4_ADD_UN8x4 (d, s);
   1.465 +	*(dest + i) = d;
   1.466 +    }
   1.467 +}
   1.468 +
   1.469 +/*
   1.470 + * PDF blend modes:
   1.471 + * The following blend modes have been taken from the PDF ISO 32000
   1.472 + * specification, which at this point in time is available from
   1.473 + * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
   1.474 + * The relevant chapters are 11.3.5 and 11.3.6.
   1.475 + * The formula for computing the final pixel color given in 11.3.6 is:
   1.476 + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
   1.477 + * with B() being the blend function.
   1.478 + * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
   1.479 + *
   1.480 + * These blend modes should match the SVG filter draft specification, as
   1.481 + * it has been designed to mirror ISO 32000. Note that at the current point
   1.482 + * no released draft exists that shows this, as the formulas have not been
   1.483 + * updated yet after the release of ISO 32000.
   1.484 + *
   1.485 + * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
   1.486 + * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
   1.487 + * argument. Note that this implementation operates on premultiplied colors,
   1.488 + * while the PDF specification does not. Therefore the code uses the formula
   1.489 + * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
   1.490 + */
   1.491 +
   1.492 +/*
   1.493 + * Multiply
   1.494 + * B(Dca, ad, Sca, as) = Dca.Sca
   1.495 + */
   1.496 +static void
   1.497 +combine_multiply_u (pixman_implementation_t *imp,
   1.498 +                    pixman_op_t              op,
   1.499 +                    uint32_t *                dest,
   1.500 +                    const uint32_t *          src,
   1.501 +                    const uint32_t *          mask,
   1.502 +                    int                      width)
   1.503 +{
   1.504 +    int i;
   1.505 +
   1.506 +    for (i = 0; i < width; ++i)
   1.507 +    {
   1.508 +	uint32_t s = combine_mask (src, mask, i);
   1.509 +	uint32_t d = *(dest + i);
   1.510 +	uint32_t ss = s;
   1.511 +	uint32_t src_ia = ALPHA_8 (~s);
   1.512 +	uint32_t dest_ia = ALPHA_8 (~d);
   1.513 +
   1.514 +	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
   1.515 +	UN8x4_MUL_UN8x4 (d, s);
   1.516 +	UN8x4_ADD_UN8x4 (d, ss);
   1.517 +
   1.518 +	*(dest + i) = d;
   1.519 +    }
   1.520 +}
   1.521 +
   1.522 +static void
   1.523 +combine_multiply_ca (pixman_implementation_t *imp,
   1.524 +                     pixman_op_t              op,
   1.525 +                     uint32_t *                dest,
   1.526 +                     const uint32_t *          src,
   1.527 +                     const uint32_t *          mask,
   1.528 +                     int                      width)
   1.529 +{
   1.530 +    int i;
   1.531 +
   1.532 +    for (i = 0; i < width; ++i)
   1.533 +    {
   1.534 +	uint32_t m = *(mask + i);
   1.535 +	uint32_t s = *(src + i);
   1.536 +	uint32_t d = *(dest + i);
   1.537 +	uint32_t r = d;
   1.538 +	uint32_t dest_ia = ALPHA_8 (~d);
   1.539 +
   1.540 +	combine_mask_ca (&s, &m);
   1.541 +
   1.542 +	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
   1.543 +	UN8x4_MUL_UN8x4 (d, s);
   1.544 +	UN8x4_ADD_UN8x4 (r, d);
   1.545 +
   1.546 +	*(dest + i) = r;
   1.547 +    }
   1.548 +}
   1.549 +
   1.550 +#define PDF_SEPARABLE_BLEND_MODE(name)					\
   1.551 +    static void								\
   1.552 +    combine_ ## name ## _u (pixman_implementation_t *imp,		\
   1.553 +			    pixman_op_t              op,		\
   1.554 +                            uint32_t *                dest,		\
   1.555 +			    const uint32_t *          src,		\
   1.556 +			    const uint32_t *          mask,		\
   1.557 +			    int                      width)		\
   1.558 +    {									\
   1.559 +	int i;								\
   1.560 +	for (i = 0; i < width; ++i) {					\
   1.561 +	    uint32_t s = combine_mask (src, mask, i);			\
   1.562 +	    uint32_t d = *(dest + i);					\
   1.563 +	    uint8_t sa = ALPHA_8 (s);					\
   1.564 +	    uint8_t isa = ~sa;						\
   1.565 +	    uint8_t da = ALPHA_8 (d);					\
   1.566 +	    uint8_t ida = ~da;						\
   1.567 +	    uint32_t result;						\
   1.568 +									\
   1.569 +	    result = d;							\
   1.570 +	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
   1.571 +	    								\
   1.572 +	    *(dest + i) = result +					\
   1.573 +		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
   1.574 +		(blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
   1.575 +		(blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
   1.576 +		(blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa));	\
   1.577 +	}								\
   1.578 +    }									\
   1.579 +    									\
   1.580 +    static void								\
   1.581 +    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
   1.582 +			     pixman_op_t              op,		\
   1.583 +                             uint32_t *                dest,		\
   1.584 +			     const uint32_t *          src,		\
   1.585 +			     const uint32_t *          mask,		\
   1.586 +			     int                     width)		\
   1.587 +    {									\
   1.588 +	int i;								\
   1.589 +	for (i = 0; i < width; ++i) {					\
   1.590 +	    uint32_t m = *(mask + i);					\
   1.591 +	    uint32_t s = *(src + i);					\
   1.592 +	    uint32_t d = *(dest + i);					\
   1.593 +	    uint8_t da = ALPHA_8 (d);					\
   1.594 +	    uint8_t ida = ~da;						\
   1.595 +	    uint32_t result;						\
   1.596 +            								\
   1.597 +	    combine_mask_ca (&s, &m);					\
   1.598 +            								\
   1.599 +	    result = d;							\
   1.600 +	    UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida);     \
   1.601 +            								\
   1.602 +	    result +=							\
   1.603 +	        (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) +	\
   1.604 +	        (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
   1.605 +	        (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
   1.606 +	        (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
   1.607 +	    								\
   1.608 +	    *(dest + i) = result;					\
   1.609 +	}								\
   1.610 +    }
   1.611 +
   1.612 +/*
   1.613 + * Screen
   1.614 + * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
   1.615 + */
   1.616 +static inline uint32_t
   1.617 +blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.618 +{
   1.619 +    return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca);
   1.620 +}
   1.621 +
   1.622 +PDF_SEPARABLE_BLEND_MODE (screen)
   1.623 +
   1.624 +/*
   1.625 + * Overlay
   1.626 + * B(Dca, Da, Sca, Sa) =
   1.627 + *   if 2.Dca < Da
   1.628 + *     2.Sca.Dca
   1.629 + *   otherwise
   1.630 + *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
   1.631 + */
   1.632 +static inline uint32_t
   1.633 +blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.634 +{
   1.635 +    uint32_t rca;
   1.636 +
   1.637 +    if (2 * dca < da)
   1.638 +	rca = 2 * sca * dca;
   1.639 +    else
   1.640 +	rca = sa * da - 2 * (da - dca) * (sa - sca);
   1.641 +    return DIV_ONE_UN8 (rca);
   1.642 +}
   1.643 +
   1.644 +PDF_SEPARABLE_BLEND_MODE (overlay)
   1.645 +
   1.646 +/*
   1.647 + * Darken
   1.648 + * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
   1.649 + */
   1.650 +static inline uint32_t
   1.651 +blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.652 +{
   1.653 +    uint32_t s, d;
   1.654 +
   1.655 +    s = sca * da;
   1.656 +    d = dca * sa;
   1.657 +    return DIV_ONE_UN8 (s > d ? d : s);
   1.658 +}
   1.659 +
   1.660 +PDF_SEPARABLE_BLEND_MODE (darken)
   1.661 +
   1.662 +/*
   1.663 + * Lighten
   1.664 + * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
   1.665 + */
   1.666 +static inline uint32_t
   1.667 +blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.668 +{
   1.669 +    uint32_t s, d;
   1.670 +
   1.671 +    s = sca * da;
   1.672 +    d = dca * sa;
   1.673 +    return DIV_ONE_UN8 (s > d ? s : d);
   1.674 +}
   1.675 +
   1.676 +PDF_SEPARABLE_BLEND_MODE (lighten)
   1.677 +
   1.678 +/*
   1.679 + * Color dodge
   1.680 + * B(Dca, Da, Sca, Sa) =
   1.681 + *   if Dca == 0
   1.682 + *     0
   1.683 + *   if Sca == Sa
   1.684 + *     Sa.Da
   1.685 + *   otherwise
   1.686 + *     Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
   1.687 + */
   1.688 +static inline uint32_t
   1.689 +blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.690 +{
   1.691 +    if (sca >= sa)
   1.692 +    {
   1.693 +	return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da);
   1.694 +    }
   1.695 +    else
   1.696 +    {
   1.697 +	uint32_t rca = dca * sa / (sa - sca);
   1.698 +	return DIV_ONE_UN8 (sa * MIN (rca, da));
   1.699 +    }
   1.700 +}
   1.701 +
   1.702 +PDF_SEPARABLE_BLEND_MODE (color_dodge)
   1.703 +
   1.704 +/*
   1.705 + * Color burn
   1.706 + * B(Dca, Da, Sca, Sa) =
   1.707 + *   if Dca == Da
   1.708 + *     Sa.Da
   1.709 + *   if Sca == 0
   1.710 + *     0
   1.711 + *   otherwise
   1.712 + *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
   1.713 + */
   1.714 +static inline uint32_t
   1.715 +blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.716 +{
   1.717 +    if (sca == 0)
   1.718 +    {
   1.719 +	return dca < da ? 0 : DIV_ONE_UN8 (sa * da);
   1.720 +    }
   1.721 +    else
   1.722 +    {
   1.723 +	uint32_t rca = (da - dca) * sa / sca;
   1.724 +	return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca));
   1.725 +    }
   1.726 +}
   1.727 +
   1.728 +PDF_SEPARABLE_BLEND_MODE (color_burn)
   1.729 +
   1.730 +/*
   1.731 + * Hard light
   1.732 + * B(Dca, Da, Sca, Sa) =
   1.733 + *   if 2.Sca < Sa
   1.734 + *     2.Sca.Dca
   1.735 + *   otherwise
   1.736 + *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
   1.737 + */
   1.738 +static inline uint32_t
   1.739 +blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.740 +{
   1.741 +    if (2 * sca < sa)
   1.742 +	return DIV_ONE_UN8 (2 * sca * dca);
   1.743 +    else
   1.744 +	return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca));
   1.745 +}
   1.746 +
   1.747 +PDF_SEPARABLE_BLEND_MODE (hard_light)
   1.748 +
   1.749 +/*
   1.750 + * Soft light
   1.751 + * B(Dca, Da, Sca, Sa) =
   1.752 + *   if (2.Sca <= Sa)
   1.753 + *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
   1.754 + *   otherwise if Dca.4 <= Da
   1.755 + *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
   1.756 + *   otherwise
   1.757 + *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
   1.758 + */
   1.759 +static inline uint32_t
   1.760 +blend_soft_light (uint32_t dca_org,
   1.761 +		  uint32_t da_org,
   1.762 +		  uint32_t sca_org,
   1.763 +		  uint32_t sa_org)
   1.764 +{
   1.765 +    double dca = dca_org * (1.0 / MASK);
   1.766 +    double da = da_org * (1.0 / MASK);
   1.767 +    double sca = sca_org * (1.0 / MASK);
   1.768 +    double sa = sa_org * (1.0 / MASK);
   1.769 +    double rca;
   1.770 +
   1.771 +    if (2 * sca < sa)
   1.772 +    {
   1.773 +	if (da == 0)
   1.774 +	    rca = dca * sa;
   1.775 +	else
   1.776 +	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
   1.777 +    }
   1.778 +    else if (da == 0)
   1.779 +    {
   1.780 +	rca = 0;
   1.781 +    }
   1.782 +    else if (4 * dca <= da)
   1.783 +    {
   1.784 +	rca = dca * sa +
   1.785 +	    (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
   1.786 +    }
   1.787 +    else
   1.788 +    {
   1.789 +	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
   1.790 +    }
   1.791 +    return rca * MASK + 0.5;
   1.792 +}
   1.793 +
   1.794 +PDF_SEPARABLE_BLEND_MODE (soft_light)
   1.795 +
   1.796 +/*
   1.797 + * Difference
   1.798 + * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
   1.799 + */
   1.800 +static inline uint32_t
   1.801 +blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.802 +{
   1.803 +    uint32_t dcasa = dca * sa;
   1.804 +    uint32_t scada = sca * da;
   1.805 +
   1.806 +    if (scada < dcasa)
   1.807 +	return DIV_ONE_UN8 (dcasa - scada);
   1.808 +    else
   1.809 +	return DIV_ONE_UN8 (scada - dcasa);
   1.810 +}
   1.811 +
   1.812 +PDF_SEPARABLE_BLEND_MODE (difference)
   1.813 +
   1.814 +/*
   1.815 + * Exclusion
   1.816 + * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
   1.817 + */
   1.818 +
   1.819 +/* This can be made faster by writing it directly and not using
   1.820 + * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
   1.821 +
   1.822 +static inline uint32_t
   1.823 +blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
   1.824 +{
   1.825 +    return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca);
   1.826 +}
   1.827 +
   1.828 +PDF_SEPARABLE_BLEND_MODE (exclusion)
   1.829 +
   1.830 +#undef PDF_SEPARABLE_BLEND_MODE
   1.831 +
   1.832 +/*
   1.833 + * PDF nonseperable blend modes are implemented using the following functions
   1.834 + * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
   1.835 + * and min value of the red, green and blue components.
   1.836 + *
   1.837 + * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
   1.838 + *
   1.839 + * clip_color (C):
   1.840 + *   l = LUM (C)
   1.841 + *   min = Cmin
   1.842 + *   max = Cmax
   1.843 + *   if n < 0.0
   1.844 + *     C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
   1.845 + *   if x > 1.0
   1.846 + *     C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
   1.847 + *   return C
   1.848 + *
   1.849 + * set_lum (C, l):
   1.850 + *   d = l – LUM (C)
   1.851 + *   C += d
   1.852 + *   return clip_color (C)
   1.853 + *
   1.854 + * SAT (C) = CH_MAX (C) - CH_MIN (C)
   1.855 + *
   1.856 + * set_sat (C, s):
   1.857 + *  if Cmax > Cmin
   1.858 + *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
   1.859 + *    Cmax = s
   1.860 + *  else
   1.861 + *    Cmid = Cmax = 0.0
   1.862 + *  Cmin = 0.0
   1.863 + *  return C
   1.864 + */
   1.865 +
   1.866 +/* For premultiplied colors, we need to know what happens when C is
   1.867 + * multiplied by a real number. LUM and SAT are linear:
   1.868 + *
   1.869 + *    LUM (r × C) = r × LUM (C)		SAT (r * C) = r * SAT (C)
   1.870 + *
   1.871 + * If we extend clip_color with an extra argument a and change
   1.872 + *
   1.873 + *        if x >= 1.0
   1.874 + *
   1.875 + * into
   1.876 + *
   1.877 + *        if x >= a
   1.878 + *
   1.879 + * then clip_color is also linear:
   1.880 + *
   1.881 + *    r * clip_color (C, a) = clip_color (r_c, ra);
   1.882 + *
   1.883 + * for positive r.
   1.884 + *
   1.885 + * Similarly, we can extend set_lum with an extra argument that is just passed
   1.886 + * on to clip_color:
   1.887 + *
   1.888 + *   r * set_lum ( C, l, a)
   1.889 + *
   1.890 + *   = r × clip_color ( C + l - LUM (C), a)
   1.891 + *
   1.892 + *   = clip_color ( r * C + r × l - r * LUM (C), r * a)
   1.893 + *
   1.894 + *   = set_lum ( r * C, r * l, r * a)
   1.895 + *
   1.896 + * Finally, set_sat:
   1.897 + *
   1.898 + *    r * set_sat (C, s) = set_sat (x * C, r * s)
   1.899 + *
   1.900 + * The above holds for all non-zero x, because the x'es in the fraction for
   1.901 + * C_mid cancel out. Specifically, it holds for x = r:
   1.902 + *
   1.903 + *    r * set_sat (C, s) = set_sat (r_c, rs)
   1.904 + *
   1.905 + */
   1.906 +
   1.907 +/* So, for the non-separable PDF blend modes, we have (using s, d for
   1.908 + * non-premultiplied colors, and S, D for premultiplied:
   1.909 + *
   1.910 + *   Color:
   1.911 + *
   1.912 + *     a_s * a_d * B(s, d)
   1.913 + *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
   1.914 + *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
   1.915 + *
   1.916 + *
   1.917 + *   Luminosity:
   1.918 + *
   1.919 + *     a_s * a_d * B(s, d)
   1.920 + *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
   1.921 + *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
   1.922 + *
   1.923 + *
   1.924 + *   Saturation:
   1.925 + *
   1.926 + *     a_s * a_d * B(s, d)
   1.927 + *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
   1.928 + *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
   1.929 + *                                        a_s * LUM (D), a_s * a_d)
   1.930 + *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
   1.931 + *
   1.932 + *   Hue:
   1.933 + *
   1.934 + *     a_s * a_d * B(s, d)
   1.935 + *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
   1.936 + *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
   1.937 + *
   1.938 + */
   1.939 +
   1.940 +#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
   1.941 +#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
   1.942 +#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
   1.943 +#define SAT(c) (CH_MAX (c) - CH_MIN (c))
   1.944 +
   1.945 +#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
   1.946 +    static void								\
   1.947 +    combine_ ## name ## _u (pixman_implementation_t *imp,		\
   1.948 +			    pixman_op_t op,				\
   1.949 +                            uint32_t *dest,				\
   1.950 +			    const uint32_t *src,				\
   1.951 +			    const uint32_t *mask,			\
   1.952 +			    int width)					\
   1.953 +    {									\
   1.954 +	int i;								\
   1.955 +	for (i = 0; i < width; ++i)					\
   1.956 +	{								\
   1.957 +	    uint32_t s = combine_mask (src, mask, i);			\
   1.958 +	    uint32_t d = *(dest + i);					\
   1.959 +	    uint8_t sa = ALPHA_8 (s);					\
   1.960 +	    uint8_t isa = ~sa;						\
   1.961 +	    uint8_t da = ALPHA_8 (d);					\
   1.962 +	    uint8_t ida = ~da;						\
   1.963 +	    uint32_t result;						\
   1.964 +	    uint32_t sc[3], dc[3], c[3];					\
   1.965 +            								\
   1.966 +	    result = d;							\
   1.967 +	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
   1.968 +	    dc[0] = RED_8 (d);						\
   1.969 +	    sc[0] = RED_8 (s);						\
   1.970 +	    dc[1] = GREEN_8 (d);					\
   1.971 +	    sc[1] = GREEN_8 (s);					\
   1.972 +	    dc[2] = BLUE_8 (d);						\
   1.973 +	    sc[2] = BLUE_8 (s);						\
   1.974 +	    blend_ ## name (c, dc, da, sc, sa);				\
   1.975 +            								\
   1.976 +	    *(dest + i) = result +					\
   1.977 +		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
   1.978 +		(DIV_ONE_UN8 (c[0]) << R_SHIFT) +			\
   1.979 +		(DIV_ONE_UN8 (c[1]) << G_SHIFT) +			\
   1.980 +		(DIV_ONE_UN8 (c[2]));					\
   1.981 +	}								\
   1.982 +    }
   1.983 +
   1.984 +static void
   1.985 +set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum)
   1.986 +{
   1.987 +    double a, l, min, max;
   1.988 +    double tmp[3];
   1.989 +
   1.990 +    a = sa * (1.0 / MASK);
   1.991 +
   1.992 +    l = lum * (1.0 / MASK);
   1.993 +    tmp[0] = src[0] * (1.0 / MASK);
   1.994 +    tmp[1] = src[1] * (1.0 / MASK);
   1.995 +    tmp[2] = src[2] * (1.0 / MASK);
   1.996 +
   1.997 +    l = l - LUM (tmp);
   1.998 +    tmp[0] += l;
   1.999 +    tmp[1] += l;
  1.1000 +    tmp[2] += l;
  1.1001 +
  1.1002 +    /* clip_color */
  1.1003 +    l = LUM (tmp);
  1.1004 +    min = CH_MIN (tmp);
  1.1005 +    max = CH_MAX (tmp);
  1.1006 +
  1.1007 +    if (min < 0)
  1.1008 +    {
  1.1009 +	if (l - min == 0.0)
  1.1010 +	{
  1.1011 +	    tmp[0] = 0;
  1.1012 +	    tmp[1] = 0;
  1.1013 +	    tmp[2] = 0;
  1.1014 +	}
  1.1015 +	else
  1.1016 +	{
  1.1017 +	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
  1.1018 +	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
  1.1019 +	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
  1.1020 +	}
  1.1021 +    }
  1.1022 +    if (max > a)
  1.1023 +    {
  1.1024 +	if (max - l == 0.0)
  1.1025 +	{
  1.1026 +	    tmp[0] = a;
  1.1027 +	    tmp[1] = a;
  1.1028 +	    tmp[2] = a;
  1.1029 +	}
  1.1030 +	else
  1.1031 +	{
  1.1032 +	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
  1.1033 +	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
  1.1034 +	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
  1.1035 +	}
  1.1036 +    }
  1.1037 +
  1.1038 +    dest[0] = tmp[0] * MASK + 0.5;
  1.1039 +    dest[1] = tmp[1] * MASK + 0.5;
  1.1040 +    dest[2] = tmp[2] * MASK + 0.5;
  1.1041 +}
  1.1042 +
  1.1043 +static void
  1.1044 +set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat)
  1.1045 +{
  1.1046 +    int id[3];
  1.1047 +    uint32_t min, max;
  1.1048 +
  1.1049 +    if (src[0] > src[1])
  1.1050 +    {
  1.1051 +	if (src[0] > src[2])
  1.1052 +	{
  1.1053 +	    id[0] = 0;
  1.1054 +	    if (src[1] > src[2])
  1.1055 +	    {
  1.1056 +		id[1] = 1;
  1.1057 +		id[2] = 2;
  1.1058 +	    }
  1.1059 +	    else
  1.1060 +	    {
  1.1061 +		id[1] = 2;
  1.1062 +		id[2] = 1;
  1.1063 +	    }
  1.1064 +	}
  1.1065 +	else
  1.1066 +	{
  1.1067 +	    id[0] = 2;
  1.1068 +	    id[1] = 0;
  1.1069 +	    id[2] = 1;
  1.1070 +	}
  1.1071 +    }
  1.1072 +    else
  1.1073 +    {
  1.1074 +	if (src[0] > src[2])
  1.1075 +	{
  1.1076 +	    id[0] = 1;
  1.1077 +	    id[1] = 0;
  1.1078 +	    id[2] = 2;
  1.1079 +	}
  1.1080 +	else
  1.1081 +	{
  1.1082 +	    id[2] = 0;
  1.1083 +	    if (src[1] > src[2])
  1.1084 +	    {
  1.1085 +		id[0] = 1;
  1.1086 +		id[1] = 2;
  1.1087 +	    }
  1.1088 +	    else
  1.1089 +	    {
  1.1090 +		id[0] = 2;
  1.1091 +		id[1] = 1;
  1.1092 +	    }
  1.1093 +	}
  1.1094 +    }
  1.1095 +
  1.1096 +    max = dest[id[0]];
  1.1097 +    min = dest[id[2]];
  1.1098 +    if (max > min)
  1.1099 +    {
  1.1100 +	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
  1.1101 +	dest[id[0]] = sat;
  1.1102 +	dest[id[2]] = 0;
  1.1103 +    }
  1.1104 +    else
  1.1105 +    {
  1.1106 +	dest[0] = dest[1] = dest[2] = 0;
  1.1107 +    }
  1.1108 +}
  1.1109 +
  1.1110 +/*
  1.1111 + * Hue:
  1.1112 + * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
  1.1113 + */
  1.1114 +static inline void
  1.1115 +blend_hsl_hue (uint32_t c[3],
  1.1116 +               uint32_t dc[3],
  1.1117 +               uint32_t da,
  1.1118 +               uint32_t sc[3],
  1.1119 +               uint32_t sa)
  1.1120 +{
  1.1121 +    c[0] = sc[0] * da;
  1.1122 +    c[1] = sc[1] * da;
  1.1123 +    c[2] = sc[2] * da;
  1.1124 +    set_sat (c, c, SAT (dc) * sa);
  1.1125 +    set_lum (c, c, sa * da, LUM (dc) * sa);
  1.1126 +}
  1.1127 +
  1.1128 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
  1.1129 +
  1.1130 +/*
  1.1131 + * Saturation:
  1.1132 + * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
  1.1133 + */
  1.1134 +static inline void
  1.1135 +blend_hsl_saturation (uint32_t c[3],
  1.1136 +                      uint32_t dc[3],
  1.1137 +                      uint32_t da,
  1.1138 +                      uint32_t sc[3],
  1.1139 +                      uint32_t sa)
  1.1140 +{
  1.1141 +    c[0] = dc[0] * sa;
  1.1142 +    c[1] = dc[1] * sa;
  1.1143 +    c[2] = dc[2] * sa;
  1.1144 +    set_sat (c, c, SAT (sc) * da);
  1.1145 +    set_lum (c, c, sa * da, LUM (dc) * sa);
  1.1146 +}
  1.1147 +
  1.1148 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
  1.1149 +
  1.1150 +/*
  1.1151 + * Color:
  1.1152 + * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
  1.1153 + */
  1.1154 +static inline void
  1.1155 +blend_hsl_color (uint32_t c[3],
  1.1156 +                 uint32_t dc[3],
  1.1157 +                 uint32_t da,
  1.1158 +                 uint32_t sc[3],
  1.1159 +                 uint32_t sa)
  1.1160 +{
  1.1161 +    c[0] = sc[0] * da;
  1.1162 +    c[1] = sc[1] * da;
  1.1163 +    c[2] = sc[2] * da;
  1.1164 +    set_lum (c, c, sa * da, LUM (dc) * sa);
  1.1165 +}
  1.1166 +
  1.1167 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
  1.1168 +
  1.1169 +/*
  1.1170 + * Luminosity:
  1.1171 + * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
  1.1172 + */
  1.1173 +static inline void
  1.1174 +blend_hsl_luminosity (uint32_t c[3],
  1.1175 +                      uint32_t dc[3],
  1.1176 +                      uint32_t da,
  1.1177 +                      uint32_t sc[3],
  1.1178 +                      uint32_t sa)
  1.1179 +{
  1.1180 +    c[0] = dc[0] * sa;
  1.1181 +    c[1] = dc[1] * sa;
  1.1182 +    c[2] = dc[2] * sa;
  1.1183 +    set_lum (c, c, sa * da, LUM (sc) * da);
  1.1184 +}
  1.1185 +
  1.1186 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
  1.1187 +
  1.1188 +#undef SAT
  1.1189 +#undef LUM
  1.1190 +#undef CH_MAX
  1.1191 +#undef CH_MIN
  1.1192 +#undef PDF_NON_SEPARABLE_BLEND_MODE
  1.1193 +
  1.1194 +/* All of the disjoint/conjoint composing functions
  1.1195 + *
  1.1196 + * The four entries in the first column indicate what source contributions
  1.1197 + * come from each of the four areas of the picture -- areas covered by neither
  1.1198 + * A nor B, areas covered only by A, areas covered only by B and finally
  1.1199 + * areas covered by both A and B.
  1.1200 + * 
  1.1201 + * Disjoint			Conjoint
  1.1202 + * Fa		Fb		Fa		Fb
  1.1203 + * (0,0,0,0)	0		0		0		0
  1.1204 + * (0,A,0,A)	1		0		1		0
  1.1205 + * (0,0,B,B)	0		1		0		1
  1.1206 + * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
  1.1207 + * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
  1.1208 + * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
  1.1209 + * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
  1.1210 + * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
  1.1211 + * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
  1.1212 + * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
  1.1213 + * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
  1.1214 + * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
  1.1215 + *
  1.1216 + * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
  1.1217 + * information about these operators.
  1.1218 + */
  1.1219 +
  1.1220 +#define COMBINE_A_OUT 1
  1.1221 +#define COMBINE_A_IN  2
  1.1222 +#define COMBINE_B_OUT 4
  1.1223 +#define COMBINE_B_IN  8
  1.1224 +
  1.1225 +#define COMBINE_CLEAR   0
  1.1226 +#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
  1.1227 +#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
  1.1228 +#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
  1.1229 +#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
  1.1230 +#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
  1.1231 +#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
  1.1232 +#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
  1.1233 +
  1.1234 +/* portion covered by a but not b */
  1.1235 +static uint8_t
  1.1236 +combine_disjoint_out_part (uint8_t a, uint8_t b)
  1.1237 +{
  1.1238 +    /* min (1, (1-b) / a) */
  1.1239 +
  1.1240 +    b = ~b;                 /* 1 - b */
  1.1241 +    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
  1.1242 +	return MASK;        /* 1 */
  1.1243 +    return DIV_UN8 (b, a);     /* (1-b) / a */
  1.1244 +}
  1.1245 +
  1.1246 +/* portion covered by both a and b */
  1.1247 +static uint8_t
  1.1248 +combine_disjoint_in_part (uint8_t a, uint8_t b)
  1.1249 +{
  1.1250 +    /* max (1-(1-b)/a,0) */
  1.1251 +    /*  = - min ((1-b)/a - 1, 0) */
  1.1252 +    /*  = 1 - min (1, (1-b)/a) */
  1.1253 +
  1.1254 +    b = ~b;                 /* 1 - b */
  1.1255 +    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
  1.1256 +	return 0;           /* 1 - 1 */
  1.1257 +    return ~DIV_UN8(b, a);    /* 1 - (1-b) / a */
  1.1258 +}
  1.1259 +
  1.1260 +/* portion covered by a but not b */
  1.1261 +static uint8_t
  1.1262 +combine_conjoint_out_part (uint8_t a, uint8_t b)
  1.1263 +{
  1.1264 +    /* max (1-b/a,0) */
  1.1265 +    /* = 1-min(b/a,1) */
  1.1266 +
  1.1267 +    /* min (1, (1-b) / a) */
  1.1268 +
  1.1269 +    if (b >= a)             /* b >= a -> b/a >= 1 */
  1.1270 +	return 0x00;        /* 0 */
  1.1271 +    return ~DIV_UN8(b, a);    /* 1 - b/a */
  1.1272 +}
  1.1273 +
  1.1274 +/* portion covered by both a and b */
  1.1275 +static uint8_t
  1.1276 +combine_conjoint_in_part (uint8_t a, uint8_t b)
  1.1277 +{
  1.1278 +    /* min (1,b/a) */
  1.1279 +
  1.1280 +    if (b >= a)             /* b >= a -> b/a >= 1 */
  1.1281 +	return MASK;        /* 1 */
  1.1282 +    return DIV_UN8 (b, a);     /* b/a */
  1.1283 +}
  1.1284 +
  1.1285 +#define GET_COMP(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
  1.1286 +
  1.1287 +#define ADD(x, y, i, t)							\
  1.1288 +    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
  1.1289 +     (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
  1.1290 +
  1.1291 +#define GENERIC(x, y, i, ax, ay, t, u, v)				\
  1.1292 +    ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) +			\
  1.1293 +            MUL_UN8 (GET_COMP (x, i), ax, (v))),			\
  1.1294 +     (uint32_t) ((uint8_t) ((t) |					\
  1.1295 +                           (0 - ((t) >> G_SHIFT)))) << (i))
  1.1296 +
  1.1297 +static void
  1.1298 +combine_disjoint_general_u (uint32_t *      dest,
  1.1299 +                            const uint32_t *src,
  1.1300 +                            const uint32_t *mask,
  1.1301 +                            int            width,
  1.1302 +                            uint8_t        combine)
  1.1303 +{
  1.1304 +    int i;
  1.1305 +
  1.1306 +    for (i = 0; i < width; ++i)
  1.1307 +    {
  1.1308 +	uint32_t s = combine_mask (src, mask, i);
  1.1309 +	uint32_t d = *(dest + i);
  1.1310 +	uint32_t m, n, o, p;
  1.1311 +	uint16_t Fa, Fb, t, u, v;
  1.1312 +	uint8_t sa = s >> A_SHIFT;
  1.1313 +	uint8_t da = d >> A_SHIFT;
  1.1314 +
  1.1315 +	switch (combine & COMBINE_A)
  1.1316 +	{
  1.1317 +	default:
  1.1318 +	    Fa = 0;
  1.1319 +	    break;
  1.1320 +
  1.1321 +	case COMBINE_A_OUT:
  1.1322 +	    Fa = combine_disjoint_out_part (sa, da);
  1.1323 +	    break;
  1.1324 +
  1.1325 +	case COMBINE_A_IN:
  1.1326 +	    Fa = combine_disjoint_in_part (sa, da);
  1.1327 +	    break;
  1.1328 +
  1.1329 +	case COMBINE_A:
  1.1330 +	    Fa = MASK;
  1.1331 +	    break;
  1.1332 +	}
  1.1333 +
  1.1334 +	switch (combine & COMBINE_B)
  1.1335 +	{
  1.1336 +	default:
  1.1337 +	    Fb = 0;
  1.1338 +	    break;
  1.1339 +
  1.1340 +	case COMBINE_B_OUT:
  1.1341 +	    Fb = combine_disjoint_out_part (da, sa);
  1.1342 +	    break;
  1.1343 +
  1.1344 +	case COMBINE_B_IN:
  1.1345 +	    Fb = combine_disjoint_in_part (da, sa);
  1.1346 +	    break;
  1.1347 +
  1.1348 +	case COMBINE_B:
  1.1349 +	    Fb = MASK;
  1.1350 +	    break;
  1.1351 +	}
  1.1352 +	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
  1.1353 +	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
  1.1354 +	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
  1.1355 +	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
  1.1356 +	s = m | n | o | p;
  1.1357 +	*(dest + i) = s;
  1.1358 +    }
  1.1359 +}
  1.1360 +
  1.1361 +static void
  1.1362 +combine_disjoint_over_u (pixman_implementation_t *imp,
  1.1363 +                         pixman_op_t              op,
  1.1364 +                         uint32_t *                dest,
  1.1365 +                         const uint32_t *          src,
  1.1366 +                         const uint32_t *          mask,
  1.1367 +                         int                      width)
  1.1368 +{
  1.1369 +    int i;
  1.1370 +
  1.1371 +    for (i = 0; i < width; ++i)
  1.1372 +    {
  1.1373 +	uint32_t s = combine_mask (src, mask, i);
  1.1374 +	uint16_t a = s >> A_SHIFT;
  1.1375 +
  1.1376 +	if (s != 0x00)
  1.1377 +	{
  1.1378 +	    uint32_t d = *(dest + i);
  1.1379 +	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
  1.1380 +	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
  1.1381 +
  1.1382 +	    *(dest + i) = d;
  1.1383 +	}
  1.1384 +    }
  1.1385 +}
  1.1386 +
  1.1387 +static void
  1.1388 +combine_disjoint_in_u (pixman_implementation_t *imp,
  1.1389 +                       pixman_op_t              op,
  1.1390 +                       uint32_t *                dest,
  1.1391 +                       const uint32_t *          src,
  1.1392 +                       const uint32_t *          mask,
  1.1393 +                       int                      width)
  1.1394 +{
  1.1395 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
  1.1396 +}
  1.1397 +
  1.1398 +static void
  1.1399 +combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
  1.1400 +                               pixman_op_t              op,
  1.1401 +                               uint32_t *                dest,
  1.1402 +                               const uint32_t *          src,
  1.1403 +                               const uint32_t *          mask,
  1.1404 +                               int                      width)
  1.1405 +{
  1.1406 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
  1.1407 +}
  1.1408 +
  1.1409 +static void
  1.1410 +combine_disjoint_out_u (pixman_implementation_t *imp,
  1.1411 +                        pixman_op_t              op,
  1.1412 +                        uint32_t *                dest,
  1.1413 +                        const uint32_t *          src,
  1.1414 +                        const uint32_t *          mask,
  1.1415 +                        int                      width)
  1.1416 +{
  1.1417 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
  1.1418 +}
  1.1419 +
  1.1420 +static void
  1.1421 +combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
  1.1422 +                                pixman_op_t              op,
  1.1423 +                                uint32_t *                dest,
  1.1424 +                                const uint32_t *          src,
  1.1425 +                                const uint32_t *          mask,
  1.1426 +                                int                      width)
  1.1427 +{
  1.1428 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
  1.1429 +}
  1.1430 +
  1.1431 +static void
  1.1432 +combine_disjoint_atop_u (pixman_implementation_t *imp,
  1.1433 +                         pixman_op_t              op,
  1.1434 +                         uint32_t *                dest,
  1.1435 +                         const uint32_t *          src,
  1.1436 +                         const uint32_t *          mask,
  1.1437 +                         int                      width)
  1.1438 +{
  1.1439 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
  1.1440 +}
  1.1441 +
  1.1442 +static void
  1.1443 +combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
  1.1444 +                                 pixman_op_t              op,
  1.1445 +                                 uint32_t *                dest,
  1.1446 +                                 const uint32_t *          src,
  1.1447 +                                 const uint32_t *          mask,
  1.1448 +                                 int                      width)
  1.1449 +{
  1.1450 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
  1.1451 +}
  1.1452 +
  1.1453 +static void
  1.1454 +combine_disjoint_xor_u (pixman_implementation_t *imp,
  1.1455 +                        pixman_op_t              op,
  1.1456 +                        uint32_t *                dest,
  1.1457 +                        const uint32_t *          src,
  1.1458 +                        const uint32_t *          mask,
  1.1459 +                        int                      width)
  1.1460 +{
  1.1461 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
  1.1462 +}
  1.1463 +
  1.1464 +static void
  1.1465 +combine_conjoint_general_u (uint32_t *      dest,
  1.1466 +                            const uint32_t *src,
  1.1467 +                            const uint32_t *mask,
  1.1468 +                            int            width,
  1.1469 +                            uint8_t        combine)
  1.1470 +{
  1.1471 +    int i;
  1.1472 +
  1.1473 +    for (i = 0; i < width; ++i)
  1.1474 +    {
  1.1475 +	uint32_t s = combine_mask (src, mask, i);
  1.1476 +	uint32_t d = *(dest + i);
  1.1477 +	uint32_t m, n, o, p;
  1.1478 +	uint16_t Fa, Fb, t, u, v;
  1.1479 +	uint8_t sa = s >> A_SHIFT;
  1.1480 +	uint8_t da = d >> A_SHIFT;
  1.1481 +
  1.1482 +	switch (combine & COMBINE_A)
  1.1483 +	{
  1.1484 +	default:
  1.1485 +	    Fa = 0;
  1.1486 +	    break;
  1.1487 +
  1.1488 +	case COMBINE_A_OUT:
  1.1489 +	    Fa = combine_conjoint_out_part (sa, da);
  1.1490 +	    break;
  1.1491 +
  1.1492 +	case COMBINE_A_IN:
  1.1493 +	    Fa = combine_conjoint_in_part (sa, da);
  1.1494 +	    break;
  1.1495 +
  1.1496 +	case COMBINE_A:
  1.1497 +	    Fa = MASK;
  1.1498 +	    break;
  1.1499 +	}
  1.1500 +
  1.1501 +	switch (combine & COMBINE_B)
  1.1502 +	{
  1.1503 +	default:
  1.1504 +	    Fb = 0;
  1.1505 +	    break;
  1.1506 +
  1.1507 +	case COMBINE_B_OUT:
  1.1508 +	    Fb = combine_conjoint_out_part (da, sa);
  1.1509 +	    break;
  1.1510 +
  1.1511 +	case COMBINE_B_IN:
  1.1512 +	    Fb = combine_conjoint_in_part (da, sa);
  1.1513 +	    break;
  1.1514 +
  1.1515 +	case COMBINE_B:
  1.1516 +	    Fb = MASK;
  1.1517 +	    break;
  1.1518 +	}
  1.1519 +
  1.1520 +	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
  1.1521 +	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
  1.1522 +	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
  1.1523 +	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
  1.1524 +
  1.1525 +	s = m | n | o | p;
  1.1526 +
  1.1527 +	*(dest + i) = s;
  1.1528 +    }
  1.1529 +}
  1.1530 +
  1.1531 +static void
  1.1532 +combine_conjoint_over_u (pixman_implementation_t *imp,
  1.1533 +                         pixman_op_t              op,
  1.1534 +                         uint32_t *                dest,
  1.1535 +                         const uint32_t *          src,
  1.1536 +                         const uint32_t *          mask,
  1.1537 +                         int                      width)
  1.1538 +{
  1.1539 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
  1.1540 +}
  1.1541 +
  1.1542 +static void
  1.1543 +combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
  1.1544 +                                 pixman_op_t              op,
  1.1545 +                                 uint32_t *                dest,
  1.1546 +                                 const uint32_t *          src,
  1.1547 +                                 const uint32_t *          mask,
  1.1548 +                                 int                      width)
  1.1549 +{
  1.1550 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
  1.1551 +}
  1.1552 +
  1.1553 +static void
  1.1554 +combine_conjoint_in_u (pixman_implementation_t *imp,
  1.1555 +                       pixman_op_t              op,
  1.1556 +                       uint32_t *                dest,
  1.1557 +                       const uint32_t *          src,
  1.1558 +                       const uint32_t *          mask,
  1.1559 +                       int                      width)
  1.1560 +{
  1.1561 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
  1.1562 +}
  1.1563 +
  1.1564 +static void
  1.1565 +combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
  1.1566 +                               pixman_op_t              op,
  1.1567 +                               uint32_t *                dest,
  1.1568 +                               const uint32_t *          src,
  1.1569 +                               const uint32_t *          mask,
  1.1570 +                               int                      width)
  1.1571 +{
  1.1572 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
  1.1573 +}
  1.1574 +
  1.1575 +static void
  1.1576 +combine_conjoint_out_u (pixman_implementation_t *imp,
  1.1577 +                        pixman_op_t              op,
  1.1578 +                        uint32_t *                dest,
  1.1579 +                        const uint32_t *          src,
  1.1580 +                        const uint32_t *          mask,
  1.1581 +                        int                      width)
  1.1582 +{
  1.1583 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
  1.1584 +}
  1.1585 +
  1.1586 +static void
  1.1587 +combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
  1.1588 +                                pixman_op_t              op,
  1.1589 +                                uint32_t *                dest,
  1.1590 +                                const uint32_t *          src,
  1.1591 +                                const uint32_t *          mask,
  1.1592 +                                int                      width)
  1.1593 +{
  1.1594 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
  1.1595 +}
  1.1596 +
  1.1597 +static void
  1.1598 +combine_conjoint_atop_u (pixman_implementation_t *imp,
  1.1599 +                         pixman_op_t              op,
  1.1600 +                         uint32_t *                dest,
  1.1601 +                         const uint32_t *          src,
  1.1602 +                         const uint32_t *          mask,
  1.1603 +                         int                      width)
  1.1604 +{
  1.1605 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
  1.1606 +}
  1.1607 +
  1.1608 +static void
  1.1609 +combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
  1.1610 +                                 pixman_op_t              op,
  1.1611 +                                 uint32_t *                dest,
  1.1612 +                                 const uint32_t *          src,
  1.1613 +                                 const uint32_t *          mask,
  1.1614 +                                 int                      width)
  1.1615 +{
  1.1616 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
  1.1617 +}
  1.1618 +
  1.1619 +static void
  1.1620 +combine_conjoint_xor_u (pixman_implementation_t *imp,
  1.1621 +                        pixman_op_t              op,
  1.1622 +                        uint32_t *                dest,
  1.1623 +                        const uint32_t *          src,
  1.1624 +                        const uint32_t *          mask,
  1.1625 +                        int                      width)
  1.1626 +{
  1.1627 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
  1.1628 +}
  1.1629 +
  1.1630 +
  1.1631 +/* Component alpha combiners */
  1.1632 +
  1.1633 +static void
  1.1634 +combine_clear_ca (pixman_implementation_t *imp,
  1.1635 +                  pixman_op_t              op,
  1.1636 +                  uint32_t *                dest,
  1.1637 +                  const uint32_t *          src,
  1.1638 +                  const uint32_t *          mask,
  1.1639 +                  int                      width)
  1.1640 +{
  1.1641 +    memset (dest, 0, width * sizeof(uint32_t));
  1.1642 +}
  1.1643 +
  1.1644 +static void
  1.1645 +combine_src_ca (pixman_implementation_t *imp,
  1.1646 +                pixman_op_t              op,
  1.1647 +                uint32_t *                dest,
  1.1648 +                const uint32_t *          src,
  1.1649 +                const uint32_t *          mask,
  1.1650 +                int                      width)
  1.1651 +{
  1.1652 +    int i;
  1.1653 +
  1.1654 +    for (i = 0; i < width; ++i)
  1.1655 +    {
  1.1656 +	uint32_t s = *(src + i);
  1.1657 +	uint32_t m = *(mask + i);
  1.1658 +
  1.1659 +	combine_mask_value_ca (&s, &m);
  1.1660 +
  1.1661 +	*(dest + i) = s;
  1.1662 +    }
  1.1663 +}
  1.1664 +
  1.1665 +static void
  1.1666 +combine_over_ca (pixman_implementation_t *imp,
  1.1667 +                 pixman_op_t              op,
  1.1668 +                 uint32_t *                dest,
  1.1669 +                 const uint32_t *          src,
  1.1670 +                 const uint32_t *          mask,
  1.1671 +                 int                      width)
  1.1672 +{
  1.1673 +    int i;
  1.1674 +
  1.1675 +    for (i = 0; i < width; ++i)
  1.1676 +    {
  1.1677 +	uint32_t s = *(src + i);
  1.1678 +	uint32_t m = *(mask + i);
  1.1679 +	uint32_t a;
  1.1680 +
  1.1681 +	combine_mask_ca (&s, &m);
  1.1682 +
  1.1683 +	a = ~m;
  1.1684 +	if (a)
  1.1685 +	{
  1.1686 +	    uint32_t d = *(dest + i);
  1.1687 +	    UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
  1.1688 +	    s = d;
  1.1689 +	}
  1.1690 +
  1.1691 +	*(dest + i) = s;
  1.1692 +    }
  1.1693 +}
  1.1694 +
  1.1695 +static void
  1.1696 +combine_over_reverse_ca (pixman_implementation_t *imp,
  1.1697 +                         pixman_op_t              op,
  1.1698 +                         uint32_t *                dest,
  1.1699 +                         const uint32_t *          src,
  1.1700 +                         const uint32_t *          mask,
  1.1701 +                         int                      width)
  1.1702 +{
  1.1703 +    int i;
  1.1704 +
  1.1705 +    for (i = 0; i < width; ++i)
  1.1706 +    {
  1.1707 +	uint32_t d = *(dest + i);
  1.1708 +	uint32_t a = ~d >> A_SHIFT;
  1.1709 +
  1.1710 +	if (a)
  1.1711 +	{
  1.1712 +	    uint32_t s = *(src + i);
  1.1713 +	    uint32_t m = *(mask + i);
  1.1714 +
  1.1715 +	    UN8x4_MUL_UN8x4 (s, m);
  1.1716 +	    UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
  1.1717 +
  1.1718 +	    *(dest + i) = s;
  1.1719 +	}
  1.1720 +    }
  1.1721 +}
  1.1722 +
  1.1723 +static void
  1.1724 +combine_in_ca (pixman_implementation_t *imp,
  1.1725 +               pixman_op_t              op,
  1.1726 +               uint32_t *                dest,
  1.1727 +               const uint32_t *          src,
  1.1728 +               const uint32_t *          mask,
  1.1729 +               int                      width)
  1.1730 +{
  1.1731 +    int i;
  1.1732 +
  1.1733 +    for (i = 0; i < width; ++i)
  1.1734 +    {
  1.1735 +	uint32_t d = *(dest + i);
  1.1736 +	uint16_t a = d >> A_SHIFT;
  1.1737 +	uint32_t s = 0;
  1.1738 +
  1.1739 +	if (a)
  1.1740 +	{
  1.1741 +	    uint32_t m = *(mask + i);
  1.1742 +
  1.1743 +	    s = *(src + i);
  1.1744 +	    combine_mask_value_ca (&s, &m);
  1.1745 +
  1.1746 +	    if (a != MASK)
  1.1747 +		UN8x4_MUL_UN8 (s, a);
  1.1748 +	}
  1.1749 +
  1.1750 +	*(dest + i) = s;
  1.1751 +    }
  1.1752 +}
  1.1753 +
  1.1754 +static void
  1.1755 +combine_in_reverse_ca (pixman_implementation_t *imp,
  1.1756 +                       pixman_op_t              op,
  1.1757 +                       uint32_t *                dest,
  1.1758 +                       const uint32_t *          src,
  1.1759 +                       const uint32_t *          mask,
  1.1760 +                       int                      width)
  1.1761 +{
  1.1762 +    int i;
  1.1763 +
  1.1764 +    for (i = 0; i < width; ++i)
  1.1765 +    {
  1.1766 +	uint32_t s = *(src + i);
  1.1767 +	uint32_t m = *(mask + i);
  1.1768 +	uint32_t a;
  1.1769 +
  1.1770 +	combine_mask_alpha_ca (&s, &m);
  1.1771 +
  1.1772 +	a = m;
  1.1773 +	if (a != ~0)
  1.1774 +	{
  1.1775 +	    uint32_t d = 0;
  1.1776 +
  1.1777 +	    if (a)
  1.1778 +	    {
  1.1779 +		d = *(dest + i);
  1.1780 +		UN8x4_MUL_UN8x4 (d, a);
  1.1781 +	    }
  1.1782 +
  1.1783 +	    *(dest + i) = d;
  1.1784 +	}
  1.1785 +    }
  1.1786 +}
  1.1787 +
  1.1788 +static void
  1.1789 +combine_out_ca (pixman_implementation_t *imp,
  1.1790 +                pixman_op_t              op,
  1.1791 +                uint32_t *                dest,
  1.1792 +                const uint32_t *          src,
  1.1793 +                const uint32_t *          mask,
  1.1794 +                int                      width)
  1.1795 +{
  1.1796 +    int i;
  1.1797 +
  1.1798 +    for (i = 0; i < width; ++i)
  1.1799 +    {
  1.1800 +	uint32_t d = *(dest + i);
  1.1801 +	uint16_t a = ~d >> A_SHIFT;
  1.1802 +	uint32_t s = 0;
  1.1803 +
  1.1804 +	if (a)
  1.1805 +	{
  1.1806 +	    uint32_t m = *(mask + i);
  1.1807 +
  1.1808 +	    s = *(src + i);
  1.1809 +	    combine_mask_value_ca (&s, &m);
  1.1810 +
  1.1811 +	    if (a != MASK)
  1.1812 +		UN8x4_MUL_UN8 (s, a);
  1.1813 +	}
  1.1814 +
  1.1815 +	*(dest + i) = s;
  1.1816 +    }
  1.1817 +}
  1.1818 +
  1.1819 +static void
  1.1820 +combine_out_reverse_ca (pixman_implementation_t *imp,
  1.1821 +                        pixman_op_t              op,
  1.1822 +                        uint32_t *                dest,
  1.1823 +                        const uint32_t *          src,
  1.1824 +                        const uint32_t *          mask,
  1.1825 +                        int                      width)
  1.1826 +{
  1.1827 +    int i;
  1.1828 +
  1.1829 +    for (i = 0; i < width; ++i)
  1.1830 +    {
  1.1831 +	uint32_t s = *(src + i);
  1.1832 +	uint32_t m = *(mask + i);
  1.1833 +	uint32_t a;
  1.1834 +
  1.1835 +	combine_mask_alpha_ca (&s, &m);
  1.1836 +
  1.1837 +	a = ~m;
  1.1838 +	if (a != ~0)
  1.1839 +	{
  1.1840 +	    uint32_t d = 0;
  1.1841 +
  1.1842 +	    if (a)
  1.1843 +	    {
  1.1844 +		d = *(dest + i);
  1.1845 +		UN8x4_MUL_UN8x4 (d, a);
  1.1846 +	    }
  1.1847 +
  1.1848 +	    *(dest + i) = d;
  1.1849 +	}
  1.1850 +    }
  1.1851 +}
  1.1852 +
  1.1853 +static void
  1.1854 +combine_atop_ca (pixman_implementation_t *imp,
  1.1855 +                 pixman_op_t              op,
  1.1856 +                 uint32_t *                dest,
  1.1857 +                 const uint32_t *          src,
  1.1858 +                 const uint32_t *          mask,
  1.1859 +                 int                      width)
  1.1860 +{
  1.1861 +    int i;
  1.1862 +
  1.1863 +    for (i = 0; i < width; ++i)
  1.1864 +    {
  1.1865 +	uint32_t d = *(dest + i);
  1.1866 +	uint32_t s = *(src + i);
  1.1867 +	uint32_t m = *(mask + i);
  1.1868 +	uint32_t ad;
  1.1869 +	uint16_t as = d >> A_SHIFT;
  1.1870 +
  1.1871 +	combine_mask_ca (&s, &m);
  1.1872 +
  1.1873 +	ad = ~m;
  1.1874 +
  1.1875 +	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
  1.1876 +
  1.1877 +	*(dest + i) = d;
  1.1878 +    }
  1.1879 +}
  1.1880 +
  1.1881 +static void
  1.1882 +combine_atop_reverse_ca (pixman_implementation_t *imp,
  1.1883 +                         pixman_op_t              op,
  1.1884 +                         uint32_t *                dest,
  1.1885 +                         const uint32_t *          src,
  1.1886 +                         const uint32_t *          mask,
  1.1887 +                         int                      width)
  1.1888 +{
  1.1889 +    int i;
  1.1890 +
  1.1891 +    for (i = 0; i < width; ++i)
  1.1892 +    {
  1.1893 +	uint32_t d = *(dest + i);
  1.1894 +	uint32_t s = *(src + i);
  1.1895 +	uint32_t m = *(mask + i);
  1.1896 +	uint32_t ad;
  1.1897 +	uint16_t as = ~d >> A_SHIFT;
  1.1898 +
  1.1899 +	combine_mask_ca (&s, &m);
  1.1900 +
  1.1901 +	ad = m;
  1.1902 +
  1.1903 +	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
  1.1904 +
  1.1905 +	*(dest + i) = d;
  1.1906 +    }
  1.1907 +}
  1.1908 +
  1.1909 +static void
  1.1910 +combine_xor_ca (pixman_implementation_t *imp,
  1.1911 +                pixman_op_t              op,
  1.1912 +                uint32_t *                dest,
  1.1913 +                const uint32_t *          src,
  1.1914 +                const uint32_t *          mask,
  1.1915 +                int                      width)
  1.1916 +{
  1.1917 +    int i;
  1.1918 +
  1.1919 +    for (i = 0; i < width; ++i)
  1.1920 +    {
  1.1921 +	uint32_t d = *(dest + i);
  1.1922 +	uint32_t s = *(src + i);
  1.1923 +	uint32_t m = *(mask + i);
  1.1924 +	uint32_t ad;
  1.1925 +	uint16_t as = ~d >> A_SHIFT;
  1.1926 +
  1.1927 +	combine_mask_ca (&s, &m);
  1.1928 +
  1.1929 +	ad = ~m;
  1.1930 +
  1.1931 +	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
  1.1932 +
  1.1933 +	*(dest + i) = d;
  1.1934 +    }
  1.1935 +}
  1.1936 +
  1.1937 +static void
  1.1938 +combine_add_ca (pixman_implementation_t *imp,
  1.1939 +                pixman_op_t              op,
  1.1940 +                uint32_t *                dest,
  1.1941 +                const uint32_t *          src,
  1.1942 +                const uint32_t *          mask,
  1.1943 +                int                      width)
  1.1944 +{
  1.1945 +    int i;
  1.1946 +
  1.1947 +    for (i = 0; i < width; ++i)
  1.1948 +    {
  1.1949 +	uint32_t s = *(src + i);
  1.1950 +	uint32_t m = *(mask + i);
  1.1951 +	uint32_t d = *(dest + i);
  1.1952 +
  1.1953 +	combine_mask_value_ca (&s, &m);
  1.1954 +
  1.1955 +	UN8x4_ADD_UN8x4 (d, s);
  1.1956 +
  1.1957 +	*(dest + i) = d;
  1.1958 +    }
  1.1959 +}
  1.1960 +
  1.1961 +static void
  1.1962 +combine_saturate_ca (pixman_implementation_t *imp,
  1.1963 +                     pixman_op_t              op,
  1.1964 +                     uint32_t *                dest,
  1.1965 +                     const uint32_t *          src,
  1.1966 +                     const uint32_t *          mask,
  1.1967 +                     int                      width)
  1.1968 +{
  1.1969 +    int i;
  1.1970 +
  1.1971 +    for (i = 0; i < width; ++i)
  1.1972 +    {
  1.1973 +	uint32_t s, d;
  1.1974 +	uint16_t sa, sr, sg, sb, da;
  1.1975 +	uint16_t t, u, v;
  1.1976 +	uint32_t m, n, o, p;
  1.1977 +
  1.1978 +	d = *(dest + i);
  1.1979 +	s = *(src + i);
  1.1980 +	m = *(mask + i);
  1.1981 +
  1.1982 +	combine_mask_ca (&s, &m);
  1.1983 +
  1.1984 +	sa = (m >> A_SHIFT);
  1.1985 +	sr = (m >> R_SHIFT) & MASK;
  1.1986 +	sg = (m >> G_SHIFT) & MASK;
  1.1987 +	sb =  m             & MASK;
  1.1988 +	da = ~d >> A_SHIFT;
  1.1989 +
  1.1990 +	if (sb <= da)
  1.1991 +	    m = ADD (s, d, 0, t);
  1.1992 +	else
  1.1993 +	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
  1.1994 +
  1.1995 +	if (sg <= da)
  1.1996 +	    n = ADD (s, d, G_SHIFT, t);
  1.1997 +	else
  1.1998 +	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
  1.1999 +
  1.2000 +	if (sr <= da)
  1.2001 +	    o = ADD (s, d, R_SHIFT, t);
  1.2002 +	else
  1.2003 +	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
  1.2004 +
  1.2005 +	if (sa <= da)
  1.2006 +	    p = ADD (s, d, A_SHIFT, t);
  1.2007 +	else
  1.2008 +	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
  1.2009 +
  1.2010 +	*(dest + i) = m | n | o | p;
  1.2011 +    }
  1.2012 +}
  1.2013 +
  1.2014 +static void
  1.2015 +combine_disjoint_general_ca (uint32_t *      dest,
  1.2016 +                             const uint32_t *src,
  1.2017 +                             const uint32_t *mask,
  1.2018 +                             int            width,
  1.2019 +                             uint8_t        combine)
  1.2020 +{
  1.2021 +    int i;
  1.2022 +
  1.2023 +    for (i = 0; i < width; ++i)
  1.2024 +    {
  1.2025 +	uint32_t s, d;
  1.2026 +	uint32_t m, n, o, p;
  1.2027 +	uint32_t Fa, Fb;
  1.2028 +	uint16_t t, u, v;
  1.2029 +	uint32_t sa;
  1.2030 +	uint8_t da;
  1.2031 +
  1.2032 +	s = *(src + i);
  1.2033 +	m = *(mask + i);
  1.2034 +	d = *(dest + i);
  1.2035 +	da = d >> A_SHIFT;
  1.2036 +
  1.2037 +	combine_mask_ca (&s, &m);
  1.2038 +
  1.2039 +	sa = m;
  1.2040 +
  1.2041 +	switch (combine & COMBINE_A)
  1.2042 +	{
  1.2043 +	default:
  1.2044 +	    Fa = 0;
  1.2045 +	    break;
  1.2046 +
  1.2047 +	case COMBINE_A_OUT:
  1.2048 +	    m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da);
  1.2049 +	    n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2050 +	    o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2051 +	    p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2052 +	    Fa = m | n | o | p;
  1.2053 +	    break;
  1.2054 +
  1.2055 +	case COMBINE_A_IN:
  1.2056 +	    m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da);
  1.2057 +	    n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2058 +	    o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2059 +	    p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2060 +	    Fa = m | n | o | p;
  1.2061 +	    break;
  1.2062 +
  1.2063 +	case COMBINE_A:
  1.2064 +	    Fa = ~0;
  1.2065 +	    break;
  1.2066 +	}
  1.2067 +
  1.2068 +	switch (combine & COMBINE_B)
  1.2069 +	{
  1.2070 +	default:
  1.2071 +	    Fb = 0;
  1.2072 +	    break;
  1.2073 +
  1.2074 +	case COMBINE_B_OUT:
  1.2075 +	    m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0));
  1.2076 +	    n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2077 +	    o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2078 +	    p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2079 +	    Fb = m | n | o | p;
  1.2080 +	    break;
  1.2081 +
  1.2082 +	case COMBINE_B_IN:
  1.2083 +	    m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0));
  1.2084 +	    n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2085 +	    o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2086 +	    p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2087 +	    Fb = m | n | o | p;
  1.2088 +	    break;
  1.2089 +
  1.2090 +	case COMBINE_B:
  1.2091 +	    Fb = ~0;
  1.2092 +	    break;
  1.2093 +	}
  1.2094 +	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
  1.2095 +	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
  1.2096 +	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
  1.2097 +	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
  1.2098 +
  1.2099 +	s = m | n | o | p;
  1.2100 +
  1.2101 +	*(dest + i) = s;
  1.2102 +    }
  1.2103 +}
  1.2104 +
  1.2105 +static void
  1.2106 +combine_disjoint_over_ca (pixman_implementation_t *imp,
  1.2107 +                          pixman_op_t              op,
  1.2108 +                          uint32_t *                dest,
  1.2109 +                          const uint32_t *          src,
  1.2110 +                          const uint32_t *          mask,
  1.2111 +                          int                      width)
  1.2112 +{
  1.2113 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
  1.2114 +}
  1.2115 +
  1.2116 +static void
  1.2117 +combine_disjoint_in_ca (pixman_implementation_t *imp,
  1.2118 +                        pixman_op_t              op,
  1.2119 +                        uint32_t *                dest,
  1.2120 +                        const uint32_t *          src,
  1.2121 +                        const uint32_t *          mask,
  1.2122 +                        int                      width)
  1.2123 +{
  1.2124 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
  1.2125 +}
  1.2126 +
  1.2127 +static void
  1.2128 +combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
  1.2129 +                                pixman_op_t              op,
  1.2130 +                                uint32_t *                dest,
  1.2131 +                                const uint32_t *          src,
  1.2132 +                                const uint32_t *          mask,
  1.2133 +                                int                      width)
  1.2134 +{
  1.2135 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
  1.2136 +}
  1.2137 +
  1.2138 +static void
  1.2139 +combine_disjoint_out_ca (pixman_implementation_t *imp,
  1.2140 +                         pixman_op_t              op,
  1.2141 +                         uint32_t *                dest,
  1.2142 +                         const uint32_t *          src,
  1.2143 +                         const uint32_t *          mask,
  1.2144 +                         int                      width)
  1.2145 +{
  1.2146 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
  1.2147 +}
  1.2148 +
  1.2149 +static void
  1.2150 +combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
  1.2151 +                                 pixman_op_t              op,
  1.2152 +                                 uint32_t *                dest,
  1.2153 +                                 const uint32_t *          src,
  1.2154 +                                 const uint32_t *          mask,
  1.2155 +                                 int                      width)
  1.2156 +{
  1.2157 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
  1.2158 +}
  1.2159 +
  1.2160 +static void
  1.2161 +combine_disjoint_atop_ca (pixman_implementation_t *imp,
  1.2162 +                          pixman_op_t              op,
  1.2163 +                          uint32_t *                dest,
  1.2164 +                          const uint32_t *          src,
  1.2165 +                          const uint32_t *          mask,
  1.2166 +                          int                      width)
  1.2167 +{
  1.2168 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
  1.2169 +}
  1.2170 +
  1.2171 +static void
  1.2172 +combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
  1.2173 +                                  pixman_op_t              op,
  1.2174 +                                  uint32_t *                dest,
  1.2175 +                                  const uint32_t *          src,
  1.2176 +                                  const uint32_t *          mask,
  1.2177 +                                  int                      width)
  1.2178 +{
  1.2179 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
  1.2180 +}
  1.2181 +
  1.2182 +static void
  1.2183 +combine_disjoint_xor_ca (pixman_implementation_t *imp,
  1.2184 +                         pixman_op_t              op,
  1.2185 +                         uint32_t *                dest,
  1.2186 +                         const uint32_t *          src,
  1.2187 +                         const uint32_t *          mask,
  1.2188 +                         int                      width)
  1.2189 +{
  1.2190 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
  1.2191 +}
  1.2192 +
  1.2193 +static void
  1.2194 +combine_conjoint_general_ca (uint32_t *      dest,
  1.2195 +                             const uint32_t *src,
  1.2196 +                             const uint32_t *mask,
  1.2197 +                             int            width,
  1.2198 +                             uint8_t        combine)
  1.2199 +{
  1.2200 +    int i;
  1.2201 +
  1.2202 +    for (i = 0; i < width; ++i)
  1.2203 +    {
  1.2204 +	uint32_t s, d;
  1.2205 +	uint32_t m, n, o, p;
  1.2206 +	uint32_t Fa, Fb;
  1.2207 +	uint16_t t, u, v;
  1.2208 +	uint32_t sa;
  1.2209 +	uint8_t da;
  1.2210 +
  1.2211 +	s = *(src + i);
  1.2212 +	m = *(mask + i);
  1.2213 +	d = *(dest + i);
  1.2214 +	da = d >> A_SHIFT;
  1.2215 +
  1.2216 +	combine_mask_ca (&s, &m);
  1.2217 +
  1.2218 +	sa = m;
  1.2219 +
  1.2220 +	switch (combine & COMBINE_A)
  1.2221 +	{
  1.2222 +	default:
  1.2223 +	    Fa = 0;
  1.2224 +	    break;
  1.2225 +
  1.2226 +	case COMBINE_A_OUT:
  1.2227 +	    m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da);
  1.2228 +	    n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2229 +	    o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2230 +	    p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2231 +	    Fa = m | n | o | p;
  1.2232 +	    break;
  1.2233 +
  1.2234 +	case COMBINE_A_IN:
  1.2235 +	    m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da);
  1.2236 +	    n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2237 +	    o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2238 +	    p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2239 +	    Fa = m | n | o | p;
  1.2240 +	    break;
  1.2241 +
  1.2242 +	case COMBINE_A:
  1.2243 +	    Fa = ~0;
  1.2244 +	    break;
  1.2245 +	}
  1.2246 +
  1.2247 +	switch (combine & COMBINE_B)
  1.2248 +	{
  1.2249 +	default:
  1.2250 +	    Fb = 0;
  1.2251 +	    break;
  1.2252 +
  1.2253 +	case COMBINE_B_OUT:
  1.2254 +	    m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0));
  1.2255 +	    n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2256 +	    o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2257 +	    p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2258 +	    Fb = m | n | o | p;
  1.2259 +	    break;
  1.2260 +
  1.2261 +	case COMBINE_B_IN:
  1.2262 +	    m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0));
  1.2263 +	    n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2264 +	    o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2265 +	    p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2266 +	    Fb = m | n | o | p;
  1.2267 +	    break;
  1.2268 +
  1.2269 +	case COMBINE_B:
  1.2270 +	    Fb = ~0;
  1.2271 +	    break;
  1.2272 +	}
  1.2273 +	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
  1.2274 +	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
  1.2275 +	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
  1.2276 +	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
  1.2277 +
  1.2278 +	s = m | n | o | p;
  1.2279 +
  1.2280 +	*(dest + i) = s;
  1.2281 +    }
  1.2282 +}
  1.2283 +
  1.2284 +static void
  1.2285 +combine_conjoint_over_ca (pixman_implementation_t *imp,
  1.2286 +                          pixman_op_t              op,
  1.2287 +                          uint32_t *                dest,
  1.2288 +                          const uint32_t *          src,
  1.2289 +                          const uint32_t *          mask,
  1.2290 +                          int                      width)
  1.2291 +{
  1.2292 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
  1.2293 +}
  1.2294 +
  1.2295 +static void
  1.2296 +combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
  1.2297 +                                  pixman_op_t              op,
  1.2298 +                                  uint32_t *                dest,
  1.2299 +                                  const uint32_t *          src,
  1.2300 +                                  const uint32_t *          mask,
  1.2301 +                                  int                      width)
  1.2302 +{
  1.2303 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
  1.2304 +}
  1.2305 +
  1.2306 +static void
  1.2307 +combine_conjoint_in_ca (pixman_implementation_t *imp,
  1.2308 +                        pixman_op_t              op,
  1.2309 +                        uint32_t *                dest,
  1.2310 +                        const uint32_t *          src,
  1.2311 +                        const uint32_t *          mask,
  1.2312 +                        int                      width)
  1.2313 +{
  1.2314 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
  1.2315 +}
  1.2316 +
  1.2317 +static void
  1.2318 +combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
  1.2319 +                                pixman_op_t              op,
  1.2320 +                                uint32_t *                dest,
  1.2321 +                                const uint32_t *          src,
  1.2322 +                                const uint32_t *          mask,
  1.2323 +                                int                      width)
  1.2324 +{
  1.2325 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
  1.2326 +}
  1.2327 +
  1.2328 +static void
  1.2329 +combine_conjoint_out_ca (pixman_implementation_t *imp,
  1.2330 +                         pixman_op_t              op,
  1.2331 +                         uint32_t *                dest,
  1.2332 +                         const uint32_t *          src,
  1.2333 +                         const uint32_t *          mask,
  1.2334 +                         int                      width)
  1.2335 +{
  1.2336 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
  1.2337 +}
  1.2338 +
  1.2339 +static void
  1.2340 +combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
  1.2341 +                                 pixman_op_t              op,
  1.2342 +                                 uint32_t *                dest,
  1.2343 +                                 const uint32_t *          src,
  1.2344 +                                 const uint32_t *          mask,
  1.2345 +                                 int                      width)
  1.2346 +{
  1.2347 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
  1.2348 +}
  1.2349 +
  1.2350 +static void
  1.2351 +combine_conjoint_atop_ca (pixman_implementation_t *imp,
  1.2352 +                          pixman_op_t              op,
  1.2353 +                          uint32_t *                dest,
  1.2354 +                          const uint32_t *          src,
  1.2355 +                          const uint32_t *          mask,
  1.2356 +                          int                      width)
  1.2357 +{
  1.2358 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
  1.2359 +}
  1.2360 +
  1.2361 +static void
  1.2362 +combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
  1.2363 +                                  pixman_op_t              op,
  1.2364 +                                  uint32_t *                dest,
  1.2365 +                                  const uint32_t *          src,
  1.2366 +                                  const uint32_t *          mask,
  1.2367 +                                  int                      width)
  1.2368 +{
  1.2369 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
  1.2370 +}
  1.2371 +
  1.2372 +static void
  1.2373 +combine_conjoint_xor_ca (pixman_implementation_t *imp,
  1.2374 +                         pixman_op_t              op,
  1.2375 +                         uint32_t *                dest,
  1.2376 +                         const uint32_t *          src,
  1.2377 +                         const uint32_t *          mask,
  1.2378 +                         int                      width)
  1.2379 +{
  1.2380 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
  1.2381 +}
  1.2382 +
  1.2383 +void
  1.2384 +_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
  1.2385 +{
  1.2386 +    /* Unified alpha */
  1.2387 +    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
  1.2388 +    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
  1.2389 +    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
  1.2390 +    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
  1.2391 +    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
  1.2392 +    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
  1.2393 +    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
  1.2394 +    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
  1.2395 +    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
  1.2396 +    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
  1.2397 +    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
  1.2398 +    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
  1.2399 +    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
  1.2400 +    imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u;
  1.2401 +
  1.2402 +    /* Disjoint, unified */
  1.2403 +    imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
  1.2404 +    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
  1.2405 +    imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
  1.2406 +    imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
  1.2407 +    imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
  1.2408 +    imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
  1.2409 +    imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
  1.2410 +    imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
  1.2411 +    imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
  1.2412 +    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
  1.2413 +    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
  1.2414 +    imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
  1.2415 +
  1.2416 +    /* Conjoint, unified */
  1.2417 +    imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
  1.2418 +    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
  1.2419 +    imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
  1.2420 +    imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
  1.2421 +    imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
  1.2422 +    imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
  1.2423 +    imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
  1.2424 +    imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
  1.2425 +    imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
  1.2426 +    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
  1.2427 +    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
  1.2428 +    imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
  1.2429 +
  1.2430 +    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
  1.2431 +    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
  1.2432 +    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
  1.2433 +    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
  1.2434 +    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
  1.2435 +    imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
  1.2436 +    imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
  1.2437 +    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
  1.2438 +    imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
  1.2439 +    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
  1.2440 +    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
  1.2441 +    imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
  1.2442 +    imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
  1.2443 +    imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
  1.2444 +    imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
  1.2445 +
  1.2446 +    /* Component alpha combiners */
  1.2447 +    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
  1.2448 +    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
  1.2449 +    /* dest */
  1.2450 +    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
  1.2451 +    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
  1.2452 +    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
  1.2453 +    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
  1.2454 +    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
  1.2455 +    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
  1.2456 +    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
  1.2457 +    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
  1.2458 +    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
  1.2459 +    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
  1.2460 +    imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
  1.2461 +
  1.2462 +    /* Disjoint CA */
  1.2463 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
  1.2464 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
  1.2465 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
  1.2466 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
  1.2467 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
  1.2468 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
  1.2469 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
  1.2470 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
  1.2471 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
  1.2472 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
  1.2473 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
  1.2474 +    imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
  1.2475 +
  1.2476 +    /* Conjoint CA */
  1.2477 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
  1.2478 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
  1.2479 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
  1.2480 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
  1.2481 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
  1.2482 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
  1.2483 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
  1.2484 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
  1.2485 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
  1.2486 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
  1.2487 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
  1.2488 +    imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
  1.2489 +
  1.2490 +    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
  1.2491 +    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
  1.2492 +    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
  1.2493 +    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
  1.2494 +    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
  1.2495 +    imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
  1.2496 +    imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
  1.2497 +    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
  1.2498 +    imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
  1.2499 +    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
  1.2500 +    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
  1.2501 +
  1.2502 +    /* It is not clear that these make sense, so make them noops for now */
  1.2503 +    imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
  1.2504 +    imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
  1.2505 +    imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
  1.2506 +    imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
  1.2507 +}

mercurial