gfx/cairo/libpixman/src/pixman-combine64.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-combine64.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2465 @@
     1.4 +/* WARNING: This file is generated by combine.pl from combine.inc.
     1.5 +   Please edit one of those files rather than this one. */
     1.6 +
     1.7 +#line 1 "pixman-combine.c.template"
     1.8 +#ifdef HAVE_CONFIG_H
     1.9 +#include <config.h>
    1.10 +#endif
    1.11 +
    1.12 +#include <math.h>
    1.13 +#include <string.h>
    1.14 +
    1.15 +#include "pixman-private.h"
    1.16 +
    1.17 +#include "pixman-combine64.h"
    1.18 +
    1.19 +/*** per channel helper functions ***/
    1.20 +
    1.21 +static void
    1.22 +combine_mask_ca (uint64_t *src, uint64_t *mask)
    1.23 +{
    1.24 +    uint64_t a = *mask;
    1.25 +
    1.26 +    uint64_t x;
    1.27 +    uint32_t xa;
    1.28 +
    1.29 +    if (!a)
    1.30 +    {
    1.31 +	*(src) = 0;
    1.32 +	return;
    1.33 +    }
    1.34 +
    1.35 +    x = *(src);
    1.36 +    if (a == ~0)
    1.37 +    {
    1.38 +	x = x >> A_SHIFT;
    1.39 +	x |= x << G_SHIFT;
    1.40 +	x |= x << R_SHIFT;
    1.41 +	*(mask) = x;
    1.42 +	return;
    1.43 +    }
    1.44 +
    1.45 +    xa = x >> A_SHIFT;
    1.46 +    UN16x4_MUL_UN16x4 (x, a);
    1.47 +    *(src) = x;
    1.48 +    
    1.49 +    UN16x4_MUL_UN16 (a, xa);
    1.50 +    *(mask) = a;
    1.51 +}
    1.52 +
    1.53 +static void
    1.54 +combine_mask_value_ca (uint64_t *src, const uint64_t *mask)
    1.55 +{
    1.56 +    uint64_t a = *mask;
    1.57 +    uint64_t x;
    1.58 +
    1.59 +    if (!a)
    1.60 +    {
    1.61 +	*(src) = 0;
    1.62 +	return;
    1.63 +    }
    1.64 +
    1.65 +    if (a == ~0)
    1.66 +	return;
    1.67 +
    1.68 +    x = *(src);
    1.69 +    UN16x4_MUL_UN16x4 (x, a);
    1.70 +    *(src) = x;
    1.71 +}
    1.72 +
    1.73 +static void
    1.74 +combine_mask_alpha_ca (const uint64_t *src, uint64_t *mask)
    1.75 +{
    1.76 +    uint64_t a = *(mask);
    1.77 +    uint64_t x;
    1.78 +
    1.79 +    if (!a)
    1.80 +	return;
    1.81 +
    1.82 +    x = *(src) >> A_SHIFT;
    1.83 +    if (x == MASK)
    1.84 +	return;
    1.85 +
    1.86 +    if (a == ~0)
    1.87 +    {
    1.88 +	x |= x << G_SHIFT;
    1.89 +	x |= x << R_SHIFT;
    1.90 +	*(mask) = x;
    1.91 +	return;
    1.92 +    }
    1.93 +
    1.94 +    UN16x4_MUL_UN16 (a, x);
    1.95 +    *(mask) = a;
    1.96 +}
    1.97 +
    1.98 +/*
    1.99 + * There are two ways of handling alpha -- either as a single unified value or
   1.100 + * a separate value for each component, hence each macro must have two
   1.101 + * versions.  The unified alpha version has a 'U' at the end of the name,
   1.102 + * the component version has a 'C'.  Similarly, functions which deal with
   1.103 + * this difference will have two versions using the same convention.
   1.104 + */
   1.105 +
   1.106 +/*
   1.107 + * All of the composing functions
   1.108 + */
   1.109 +
   1.110 +static force_inline uint64_t
   1.111 +combine_mask (const uint64_t *src, const uint64_t *mask, int i)
   1.112 +{
   1.113 +    uint64_t s, m;
   1.114 +
   1.115 +    if (mask)
   1.116 +    {
   1.117 +	m = *(mask + i) >> A_SHIFT;
   1.118 +
   1.119 +	if (!m)
   1.120 +	    return 0;
   1.121 +    }
   1.122 +
   1.123 +    s = *(src + i);
   1.124 +
   1.125 +    if (mask)
   1.126 +	UN16x4_MUL_UN16 (s, m);
   1.127 +
   1.128 +    return s;
   1.129 +}
   1.130 +
   1.131 +static void
   1.132 +combine_clear (pixman_implementation_t *imp,
   1.133 +               pixman_op_t              op,
   1.134 +               uint64_t *                dest,
   1.135 +               const uint64_t *          src,
   1.136 +               const uint64_t *          mask,
   1.137 +               int                      width)
   1.138 +{
   1.139 +    memset (dest, 0, width * sizeof(uint64_t));
   1.140 +}
   1.141 +
   1.142 +static void
   1.143 +combine_dst (pixman_implementation_t *imp,
   1.144 +	     pixman_op_t	      op,
   1.145 +	     uint64_t *		      dest,
   1.146 +	     const uint64_t *	      src,
   1.147 +	     const uint64_t *          mask,
   1.148 +	     int		      width)
   1.149 +{
   1.150 +    return;
   1.151 +}
   1.152 +
   1.153 +static void
   1.154 +combine_src_u (pixman_implementation_t *imp,
   1.155 +               pixman_op_t              op,
   1.156 +               uint64_t *                dest,
   1.157 +               const uint64_t *          src,
   1.158 +               const uint64_t *          mask,
   1.159 +               int                      width)
   1.160 +{
   1.161 +    int i;
   1.162 +
   1.163 +    if (!mask)
   1.164 +	memcpy (dest, src, width * sizeof (uint64_t));
   1.165 +    else
   1.166 +    {
   1.167 +	for (i = 0; i < width; ++i)
   1.168 +	{
   1.169 +	    uint64_t s = combine_mask (src, mask, i);
   1.170 +
   1.171 +	    *(dest + i) = s;
   1.172 +	}
   1.173 +    }
   1.174 +}
   1.175 +
   1.176 +/* if the Src is opaque, call combine_src_u */
   1.177 +static void
   1.178 +combine_over_u (pixman_implementation_t *imp,
   1.179 +                pixman_op_t              op,
   1.180 +                uint64_t *                dest,
   1.181 +                const uint64_t *          src,
   1.182 +                const uint64_t *          mask,
   1.183 +                int                      width)
   1.184 +{
   1.185 +    int i;
   1.186 +
   1.187 +    for (i = 0; i < width; ++i)
   1.188 +    {
   1.189 +	uint64_t s = combine_mask (src, mask, i);
   1.190 +	uint64_t d = *(dest + i);
   1.191 +	uint64_t ia = ALPHA_16 (~s);
   1.192 +
   1.193 +	UN16x4_MUL_UN16_ADD_UN16x4 (d, ia, s);
   1.194 +	*(dest + i) = d;
   1.195 +    }
   1.196 +}
   1.197 +
   1.198 +/* if the Dst is opaque, this is a noop */
   1.199 +static void
   1.200 +combine_over_reverse_u (pixman_implementation_t *imp,
   1.201 +                        pixman_op_t              op,
   1.202 +                        uint64_t *                dest,
   1.203 +                        const uint64_t *          src,
   1.204 +                        const uint64_t *          mask,
   1.205 +                        int                      width)
   1.206 +{
   1.207 +    int i;
   1.208 +
   1.209 +    for (i = 0; i < width; ++i)
   1.210 +    {
   1.211 +	uint64_t s = combine_mask (src, mask, i);
   1.212 +	uint64_t d = *(dest + i);
   1.213 +	uint64_t ia = ALPHA_16 (~*(dest + i));
   1.214 +	UN16x4_MUL_UN16_ADD_UN16x4 (s, ia, d);
   1.215 +	*(dest + i) = s;
   1.216 +    }
   1.217 +}
   1.218 +
   1.219 +/* if the Dst is opaque, call combine_src_u */
   1.220 +static void
   1.221 +combine_in_u (pixman_implementation_t *imp,
   1.222 +              pixman_op_t              op,
   1.223 +              uint64_t *                dest,
   1.224 +              const uint64_t *          src,
   1.225 +              const uint64_t *          mask,
   1.226 +              int                      width)
   1.227 +{
   1.228 +    int i;
   1.229 +
   1.230 +    for (i = 0; i < width; ++i)
   1.231 +    {
   1.232 +	uint64_t s = combine_mask (src, mask, i);
   1.233 +	uint64_t a = ALPHA_16 (*(dest + i));
   1.234 +	UN16x4_MUL_UN16 (s, a);
   1.235 +	*(dest + i) = s;
   1.236 +    }
   1.237 +}
   1.238 +
   1.239 +/* if the Src is opaque, this is a noop */
   1.240 +static void
   1.241 +combine_in_reverse_u (pixman_implementation_t *imp,
   1.242 +                      pixman_op_t              op,
   1.243 +                      uint64_t *                dest,
   1.244 +                      const uint64_t *          src,
   1.245 +                      const uint64_t *          mask,
   1.246 +                      int                      width)
   1.247 +{
   1.248 +    int i;
   1.249 +
   1.250 +    for (i = 0; i < width; ++i)
   1.251 +    {
   1.252 +	uint64_t s = combine_mask (src, mask, i);
   1.253 +	uint64_t d = *(dest + i);
   1.254 +	uint64_t a = ALPHA_16 (s);
   1.255 +	UN16x4_MUL_UN16 (d, a);
   1.256 +	*(dest + i) = d;
   1.257 +    }
   1.258 +}
   1.259 +
   1.260 +/* if the Dst is opaque, call combine_clear */
   1.261 +static void
   1.262 +combine_out_u (pixman_implementation_t *imp,
   1.263 +               pixman_op_t              op,
   1.264 +               uint64_t *                dest,
   1.265 +               const uint64_t *          src,
   1.266 +               const uint64_t *          mask,
   1.267 +               int                      width)
   1.268 +{
   1.269 +    int i;
   1.270 +
   1.271 +    for (i = 0; i < width; ++i)
   1.272 +    {
   1.273 +	uint64_t s = combine_mask (src, mask, i);
   1.274 +	uint64_t a = ALPHA_16 (~*(dest + i));
   1.275 +	UN16x4_MUL_UN16 (s, a);
   1.276 +	*(dest + i) = s;
   1.277 +    }
   1.278 +}
   1.279 +
   1.280 +/* if the Src is opaque, call combine_clear */
   1.281 +static void
   1.282 +combine_out_reverse_u (pixman_implementation_t *imp,
   1.283 +                       pixman_op_t              op,
   1.284 +                       uint64_t *                dest,
   1.285 +                       const uint64_t *          src,
   1.286 +                       const uint64_t *          mask,
   1.287 +                       int                      width)
   1.288 +{
   1.289 +    int i;
   1.290 +
   1.291 +    for (i = 0; i < width; ++i)
   1.292 +    {
   1.293 +	uint64_t s = combine_mask (src, mask, i);
   1.294 +	uint64_t d = *(dest + i);
   1.295 +	uint64_t a = ALPHA_16 (~s);
   1.296 +	UN16x4_MUL_UN16 (d, a);
   1.297 +	*(dest + i) = d;
   1.298 +    }
   1.299 +}
   1.300 +
   1.301 +/* if the Src is opaque, call combine_in_u */
   1.302 +/* if the Dst is opaque, call combine_over_u */
   1.303 +/* if both the Src and Dst are opaque, call combine_src_u */
   1.304 +static void
   1.305 +combine_atop_u (pixman_implementation_t *imp,
   1.306 +                pixman_op_t              op,
   1.307 +                uint64_t *                dest,
   1.308 +                const uint64_t *          src,
   1.309 +                const uint64_t *          mask,
   1.310 +                int                      width)
   1.311 +{
   1.312 +    int i;
   1.313 +
   1.314 +    for (i = 0; i < width; ++i)
   1.315 +    {
   1.316 +	uint64_t s = combine_mask (src, mask, i);
   1.317 +	uint64_t d = *(dest + i);
   1.318 +	uint64_t dest_a = ALPHA_16 (d);
   1.319 +	uint64_t src_ia = ALPHA_16 (~s);
   1.320 +
   1.321 +	UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_a, d, src_ia);
   1.322 +	*(dest + i) = s;
   1.323 +    }
   1.324 +}
   1.325 +
   1.326 +/* if the Src is opaque, call combine_over_reverse_u */
   1.327 +/* if the Dst is opaque, call combine_in_reverse_u */
   1.328 +/* if both the Src and Dst are opaque, call combine_dst_u */
   1.329 +static void
   1.330 +combine_atop_reverse_u (pixman_implementation_t *imp,
   1.331 +                        pixman_op_t              op,
   1.332 +                        uint64_t *                dest,
   1.333 +                        const uint64_t *          src,
   1.334 +                        const uint64_t *          mask,
   1.335 +                        int                      width)
   1.336 +{
   1.337 +    int i;
   1.338 +
   1.339 +    for (i = 0; i < width; ++i)
   1.340 +    {
   1.341 +	uint64_t s = combine_mask (src, mask, i);
   1.342 +	uint64_t d = *(dest + i);
   1.343 +	uint64_t src_a = ALPHA_16 (s);
   1.344 +	uint64_t dest_ia = ALPHA_16 (~d);
   1.345 +
   1.346 +	UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_a);
   1.347 +	*(dest + i) = s;
   1.348 +    }
   1.349 +}
   1.350 +
   1.351 +/* if the Src is opaque, call combine_over_u */
   1.352 +/* if the Dst is opaque, call combine_over_reverse_u */
   1.353 +/* if both the Src and Dst are opaque, call combine_clear */
   1.354 +static void
   1.355 +combine_xor_u (pixman_implementation_t *imp,
   1.356 +               pixman_op_t              op,
   1.357 +               uint64_t *                dest,
   1.358 +               const uint64_t *          src,
   1.359 +               const uint64_t *          mask,
   1.360 +               int                      width)
   1.361 +{
   1.362 +    int i;
   1.363 +
   1.364 +    for (i = 0; i < width; ++i)
   1.365 +    {
   1.366 +	uint64_t s = combine_mask (src, mask, i);
   1.367 +	uint64_t d = *(dest + i);
   1.368 +	uint64_t src_ia = ALPHA_16 (~s);
   1.369 +	uint64_t dest_ia = ALPHA_16 (~d);
   1.370 +
   1.371 +	UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (s, dest_ia, d, src_ia);
   1.372 +	*(dest + i) = s;
   1.373 +    }
   1.374 +}
   1.375 +
   1.376 +static void
   1.377 +combine_add_u (pixman_implementation_t *imp,
   1.378 +               pixman_op_t              op,
   1.379 +               uint64_t *                dest,
   1.380 +               const uint64_t *          src,
   1.381 +               const uint64_t *          mask,
   1.382 +               int                      width)
   1.383 +{
   1.384 +    int i;
   1.385 +
   1.386 +    for (i = 0; i < width; ++i)
   1.387 +    {
   1.388 +	uint64_t s = combine_mask (src, mask, i);
   1.389 +	uint64_t d = *(dest + i);
   1.390 +	UN16x4_ADD_UN16x4 (d, s);
   1.391 +	*(dest + i) = d;
   1.392 +    }
   1.393 +}
   1.394 +
   1.395 +/* if the Src is opaque, call combine_add_u */
   1.396 +/* if the Dst is opaque, call combine_add_u */
   1.397 +/* if both the Src and Dst are opaque, call combine_add_u */
   1.398 +static void
   1.399 +combine_saturate_u (pixman_implementation_t *imp,
   1.400 +                    pixman_op_t              op,
   1.401 +                    uint64_t *                dest,
   1.402 +                    const uint64_t *          src,
   1.403 +                    const uint64_t *          mask,
   1.404 +                    int                      width)
   1.405 +{
   1.406 +    int i;
   1.407 +
   1.408 +    for (i = 0; i < width; ++i)
   1.409 +    {
   1.410 +	uint64_t s = combine_mask (src, mask, i);
   1.411 +	uint64_t d = *(dest + i);
   1.412 +	uint32_t sa, da;
   1.413 +
   1.414 +	sa = s >> A_SHIFT;
   1.415 +	da = ~d >> A_SHIFT;
   1.416 +	if (sa > da)
   1.417 +	{
   1.418 +	    sa = DIV_UN16 (da, sa);
   1.419 +	    UN16x4_MUL_UN16 (s, sa);
   1.420 +	}
   1.421 +	;
   1.422 +	UN16x4_ADD_UN16x4 (d, s);
   1.423 +	*(dest + i) = d;
   1.424 +    }
   1.425 +}
   1.426 +
   1.427 +/*
   1.428 + * PDF blend modes:
   1.429 + * The following blend modes have been taken from the PDF ISO 32000
   1.430 + * specification, which at this point in time is available from
   1.431 + * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
   1.432 + * The relevant chapters are 11.3.5 and 11.3.6.
   1.433 + * The formula for computing the final pixel color given in 11.3.6 is:
   1.434 + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
   1.435 + * with B() being the blend function.
   1.436 + * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
   1.437 + *
   1.438 + * These blend modes should match the SVG filter draft specification, as
   1.439 + * it has been designed to mirror ISO 32000. Note that at the current point
   1.440 + * no released draft exists that shows this, as the formulas have not been
   1.441 + * updated yet after the release of ISO 32000.
   1.442 + *
   1.443 + * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
   1.444 + * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
   1.445 + * argument. Note that this implementation operates on premultiplied colors,
   1.446 + * while the PDF specification does not. Therefore the code uses the formula
   1.447 + * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
   1.448 + */
   1.449 +
   1.450 +/*
   1.451 + * Multiply
   1.452 + * B(Dca, ad, Sca, as) = Dca.Sca
   1.453 + */
   1.454 +
   1.455 +static void
   1.456 +combine_multiply_u (pixman_implementation_t *imp,
   1.457 +                    pixman_op_t              op,
   1.458 +                    uint64_t *                dest,
   1.459 +                    const uint64_t *          src,
   1.460 +                    const uint64_t *          mask,
   1.461 +                    int                      width)
   1.462 +{
   1.463 +    int i;
   1.464 +
   1.465 +    for (i = 0; i < width; ++i)
   1.466 +    {
   1.467 +	uint64_t s = combine_mask (src, mask, i);
   1.468 +	uint64_t d = *(dest + i);
   1.469 +	uint64_t ss = s;
   1.470 +	uint64_t src_ia = ALPHA_16 (~s);
   1.471 +	uint64_t dest_ia = ALPHA_16 (~d);
   1.472 +
   1.473 +	UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (ss, dest_ia, d, src_ia);
   1.474 +	UN16x4_MUL_UN16x4 (d, s);
   1.475 +	UN16x4_ADD_UN16x4 (d, ss);
   1.476 +
   1.477 +	*(dest + i) = d;
   1.478 +    }
   1.479 +}
   1.480 +
   1.481 +static void
   1.482 +combine_multiply_ca (pixman_implementation_t *imp,
   1.483 +                     pixman_op_t              op,
   1.484 +                     uint64_t *                dest,
   1.485 +                     const uint64_t *          src,
   1.486 +                     const uint64_t *          mask,
   1.487 +                     int                      width)
   1.488 +{
   1.489 +    int i;
   1.490 +
   1.491 +    for (i = 0; i < width; ++i)
   1.492 +    {
   1.493 +	uint64_t m = *(mask + i);
   1.494 +	uint64_t s = *(src + i);
   1.495 +	uint64_t d = *(dest + i);
   1.496 +	uint64_t r = d;
   1.497 +	uint64_t dest_ia = ALPHA_16 (~d);
   1.498 +
   1.499 +	combine_mask_value_ca (&s, &m);
   1.500 +
   1.501 +	UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (r, ~m, s, dest_ia);
   1.502 +	UN16x4_MUL_UN16x4 (d, s);
   1.503 +	UN16x4_ADD_UN16x4 (r, d);
   1.504 +
   1.505 +	*(dest + i) = r;
   1.506 +    }
   1.507 +}
   1.508 +
   1.509 +#define PDF_SEPARABLE_BLEND_MODE(name)					\
   1.510 +    static void								\
   1.511 +    combine_ ## name ## _u (pixman_implementation_t *imp,		\
   1.512 +			    pixman_op_t              op,		\
   1.513 +                            uint64_t *                dest,		\
   1.514 +			    const uint64_t *          src,		\
   1.515 +			    const uint64_t *          mask,		\
   1.516 +			    int                      width)		\
   1.517 +    {									\
   1.518 +	int i;								\
   1.519 +	for (i = 0; i < width; ++i) {					\
   1.520 +	    uint64_t s = combine_mask (src, mask, i);			\
   1.521 +	    uint64_t d = *(dest + i);					\
   1.522 +	    uint16_t sa = ALPHA_16 (s);					\
   1.523 +	    uint16_t isa = ~sa;						\
   1.524 +	    uint16_t da = ALPHA_16 (d);					\
   1.525 +	    uint16_t ida = ~da;						\
   1.526 +	    uint64_t result;						\
   1.527 +									\
   1.528 +	    result = d;							\
   1.529 +	    UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida);	\
   1.530 +	    								\
   1.531 +	    *(dest + i) = result +					\
   1.532 +		(DIV_ONE_UN16 (sa * (uint64_t)da) << A_SHIFT) +		\
   1.533 +		(blend_ ## name (RED_16 (d), da, RED_16 (s), sa) << R_SHIFT) + \
   1.534 +		(blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), sa) << G_SHIFT) + \
   1.535 +		(blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), sa));	\
   1.536 +	}								\
   1.537 +    }									\
   1.538 +    									\
   1.539 +    static void								\
   1.540 +    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
   1.541 +			     pixman_op_t              op,		\
   1.542 +                             uint64_t *                dest,		\
   1.543 +			     const uint64_t *          src,		\
   1.544 +			     const uint64_t *          mask,		\
   1.545 +			     int                     width)		\
   1.546 +    {									\
   1.547 +	int i;								\
   1.548 +	for (i = 0; i < width; ++i) {					\
   1.549 +	    uint64_t m = *(mask + i);					\
   1.550 +	    uint64_t s = *(src + i);					\
   1.551 +	    uint64_t d = *(dest + i);					\
   1.552 +	    uint16_t da = ALPHA_16 (d);					\
   1.553 +	    uint16_t ida = ~da;						\
   1.554 +	    uint64_t result;						\
   1.555 +            								\
   1.556 +	    combine_mask_value_ca (&s, &m);				\
   1.557 +            								\
   1.558 +	    result = d;							\
   1.559 +	    UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (result, ~m, s, ida);     \
   1.560 +            								\
   1.561 +	    result +=							\
   1.562 +	        (DIV_ONE_UN16 (ALPHA_16 (m) * (uint64_t)da) << A_SHIFT) +	\
   1.563 +	        (blend_ ## name (RED_16 (d), da, RED_16 (s), RED_16 (m)) << R_SHIFT) + \
   1.564 +	        (blend_ ## name (GREEN_16 (d), da, GREEN_16 (s), GREEN_16 (m)) << G_SHIFT) + \
   1.565 +	        (blend_ ## name (BLUE_16 (d), da, BLUE_16 (s), BLUE_16 (m))); \
   1.566 +	    								\
   1.567 +	    *(dest + i) = result;					\
   1.568 +	}								\
   1.569 +    }
   1.570 +
   1.571 +/*
   1.572 + * Screen
   1.573 + * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
   1.574 + */
   1.575 +static inline uint64_t
   1.576 +blend_screen (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.577 +{
   1.578 +    return DIV_ONE_UN16 (sca * da + dca * sa - sca * dca);
   1.579 +}
   1.580 +
   1.581 +PDF_SEPARABLE_BLEND_MODE (screen)
   1.582 +
   1.583 +/*
   1.584 + * Overlay
   1.585 + * B(Dca, Da, Sca, Sa) =
   1.586 + *   if 2.Dca < Da
   1.587 + *     2.Sca.Dca
   1.588 + *   otherwise
   1.589 + *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
   1.590 + */
   1.591 +static inline uint64_t
   1.592 +blend_overlay (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.593 +{
   1.594 +    uint64_t rca;
   1.595 +
   1.596 +    if (2 * dca < da)
   1.597 +	rca = 2 * sca * dca;
   1.598 +    else
   1.599 +	rca = sa * da - 2 * (da - dca) * (sa - sca);
   1.600 +    return DIV_ONE_UN16 (rca);
   1.601 +}
   1.602 +
   1.603 +PDF_SEPARABLE_BLEND_MODE (overlay)
   1.604 +
   1.605 +/*
   1.606 + * Darken
   1.607 + * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
   1.608 + */
   1.609 +static inline uint64_t
   1.610 +blend_darken (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.611 +{
   1.612 +    uint64_t s, d;
   1.613 +
   1.614 +    s = sca * da;
   1.615 +    d = dca * sa;
   1.616 +    return DIV_ONE_UN16 (s > d ? d : s);
   1.617 +}
   1.618 +
   1.619 +PDF_SEPARABLE_BLEND_MODE (darken)
   1.620 +
   1.621 +/*
   1.622 + * Lighten
   1.623 + * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
   1.624 + */
   1.625 +static inline uint64_t
   1.626 +blend_lighten (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.627 +{
   1.628 +    uint64_t s, d;
   1.629 +
   1.630 +    s = sca * da;
   1.631 +    d = dca * sa;
   1.632 +    return DIV_ONE_UN16 (s > d ? s : d);
   1.633 +}
   1.634 +
   1.635 +PDF_SEPARABLE_BLEND_MODE (lighten)
   1.636 +
   1.637 +/*
   1.638 + * Color dodge
   1.639 + * B(Dca, Da, Sca, Sa) =
   1.640 + *   if Dca == 0
   1.641 + *     0
   1.642 + *   if Sca == Sa
   1.643 + *     Sa.Da
   1.644 + *   otherwise
   1.645 + *     Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
   1.646 + */
   1.647 +static inline uint64_t
   1.648 +blend_color_dodge (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.649 +{
   1.650 +    if (sca >= sa)
   1.651 +    {
   1.652 +	return dca == 0 ? 0 : DIV_ONE_UN16 (sa * da);
   1.653 +    }
   1.654 +    else
   1.655 +    {
   1.656 +	uint64_t rca = dca * sa / (sa - sca);
   1.657 +	return DIV_ONE_UN16 (sa * MIN (rca, da));
   1.658 +    }
   1.659 +}
   1.660 +
   1.661 +PDF_SEPARABLE_BLEND_MODE (color_dodge)
   1.662 +
   1.663 +/*
   1.664 + * Color burn
   1.665 + * B(Dca, Da, Sca, Sa) =
   1.666 + *   if Dca == Da
   1.667 + *     Sa.Da
   1.668 + *   if Sca == 0
   1.669 + *     0
   1.670 + *   otherwise
   1.671 + *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
   1.672 + */
   1.673 +static inline uint64_t
   1.674 +blend_color_burn (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.675 +{
   1.676 +    if (sca == 0)
   1.677 +    {
   1.678 +	return dca < da ? 0 : DIV_ONE_UN16 (sa * da);
   1.679 +    }
   1.680 +    else
   1.681 +    {
   1.682 +	uint64_t rca = (da - dca) * sa / sca;
   1.683 +	return DIV_ONE_UN16 (sa * (MAX (rca, da) - rca));
   1.684 +    }
   1.685 +}
   1.686 +
   1.687 +PDF_SEPARABLE_BLEND_MODE (color_burn)
   1.688 +
   1.689 +/*
   1.690 + * Hard light
   1.691 + * B(Dca, Da, Sca, Sa) =
   1.692 + *   if 2.Sca < Sa
   1.693 + *     2.Sca.Dca
   1.694 + *   otherwise
   1.695 + *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
   1.696 + */
   1.697 +static inline uint64_t
   1.698 +blend_hard_light (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.699 +{
   1.700 +    if (2 * sca < sa)
   1.701 +	return DIV_ONE_UN16 (2 * sca * dca);
   1.702 +    else
   1.703 +	return DIV_ONE_UN16 (sa * da - 2 * (da - dca) * (sa - sca));
   1.704 +}
   1.705 +
   1.706 +PDF_SEPARABLE_BLEND_MODE (hard_light)
   1.707 +
   1.708 +/*
   1.709 + * Soft light
   1.710 + * B(Dca, Da, Sca, Sa) =
   1.711 + *   if (2.Sca <= Sa)
   1.712 + *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
   1.713 + *   otherwise if Dca.4 <= Da
   1.714 + *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
   1.715 + *   otherwise
   1.716 + *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
   1.717 + */
   1.718 +static inline uint64_t
   1.719 +blend_soft_light (uint64_t dca_org,
   1.720 +		  uint64_t da_org,
   1.721 +		  uint64_t sca_org,
   1.722 +		  uint64_t sa_org)
   1.723 +{
   1.724 +    double dca = dca_org * (1.0 / MASK);
   1.725 +    double da = da_org * (1.0 / MASK);
   1.726 +    double sca = sca_org * (1.0 / MASK);
   1.727 +    double sa = sa_org * (1.0 / MASK);
   1.728 +    double rca;
   1.729 +
   1.730 +    if (2 * sca < sa)
   1.731 +    {
   1.732 +	if (da == 0)
   1.733 +	    rca = dca * sa;
   1.734 +	else
   1.735 +	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
   1.736 +    }
   1.737 +    else if (da == 0)
   1.738 +    {
   1.739 +	rca = 0;
   1.740 +    }
   1.741 +    else if (4 * dca <= da)
   1.742 +    {
   1.743 +	rca = dca * sa +
   1.744 +	    (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
   1.745 +    }
   1.746 +    else
   1.747 +    {
   1.748 +	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
   1.749 +    }
   1.750 +    return rca * MASK + 0.5;
   1.751 +}
   1.752 +
   1.753 +PDF_SEPARABLE_BLEND_MODE (soft_light)
   1.754 +
   1.755 +/*
   1.756 + * Difference
   1.757 + * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
   1.758 + */
   1.759 +static inline uint64_t
   1.760 +blend_difference (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.761 +{
   1.762 +    uint64_t dcasa = dca * sa;
   1.763 +    uint64_t scada = sca * da;
   1.764 +
   1.765 +    if (scada < dcasa)
   1.766 +	return DIV_ONE_UN16 (dcasa - scada);
   1.767 +    else
   1.768 +	return DIV_ONE_UN16 (scada - dcasa);
   1.769 +}
   1.770 +
   1.771 +PDF_SEPARABLE_BLEND_MODE (difference)
   1.772 +
   1.773 +/*
   1.774 + * Exclusion
   1.775 + * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
   1.776 + */
   1.777 +
   1.778 +/* This can be made faster by writing it directly and not using
   1.779 + * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
   1.780 +
   1.781 +static inline uint64_t
   1.782 +blend_exclusion (uint64_t dca, uint64_t da, uint64_t sca, uint64_t sa)
   1.783 +{
   1.784 +    return DIV_ONE_UN16 (sca * da + dca * sa - 2 * dca * sca);
   1.785 +}
   1.786 +
   1.787 +PDF_SEPARABLE_BLEND_MODE (exclusion)
   1.788 +
   1.789 +#undef PDF_SEPARABLE_BLEND_MODE
   1.790 +
   1.791 +/*
   1.792 + * PDF nonseperable blend modes are implemented using the following functions
   1.793 + * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
   1.794 + * and min value of the red, green and blue components.
   1.795 + *
   1.796 + * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
   1.797 + *
   1.798 + * clip_color (C):
   1.799 + *   l = LUM (C)
   1.800 + *   min = Cmin
   1.801 + *   max = Cmax
   1.802 + *   if n < 0.0
   1.803 + *     C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
   1.804 + *   if x > 1.0
   1.805 + *     C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
   1.806 + *   return C
   1.807 + *
   1.808 + * set_lum (C, l):
   1.809 + *   d = l – LUM (C)
   1.810 + *   C += d
   1.811 + *   return clip_color (C)
   1.812 + *
   1.813 + * SAT (C) = CH_MAX (C) - CH_MIN (C)
   1.814 + *
   1.815 + * set_sat (C, s):
   1.816 + *  if Cmax > Cmin
   1.817 + *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
   1.818 + *    Cmax = s
   1.819 + *  else
   1.820 + *    Cmid = Cmax = 0.0
   1.821 + *  Cmin = 0.0
   1.822 + *  return C
   1.823 + */
   1.824 +
   1.825 +/* For premultiplied colors, we need to know what happens when C is
   1.826 + * multiplied by a real number. LUM and SAT are linear:
   1.827 + *
   1.828 + *    LUM (r × C) = r × LUM (C)		SAT (r * C) = r * SAT (C)
   1.829 + *
   1.830 + * If we extend clip_color with an extra argument a and change
   1.831 + *
   1.832 + *        if x >= 1.0
   1.833 + *
   1.834 + * into
   1.835 + *
   1.836 + *        if x >= a
   1.837 + *
   1.838 + * then clip_color is also linear:
   1.839 + *
   1.840 + *    r * clip_color (C, a) = clip_color (r_c, ra);
   1.841 + *
   1.842 + * for positive r.
   1.843 + *
   1.844 + * Similarly, we can extend set_lum with an extra argument that is just passed
   1.845 + * on to clip_color:
   1.846 + *
   1.847 + *   r * set_lum ( C, l, a)
   1.848 + *
   1.849 + *   = r × clip_color ( C + l - LUM (C), a)
   1.850 + *
   1.851 + *   = clip_color ( r * C + r × l - r * LUM (C), r * a)
   1.852 + *
   1.853 + *   = set_lum ( r * C, r * l, r * a)
   1.854 + *
   1.855 + * Finally, set_sat:
   1.856 + *
   1.857 + *    r * set_sat (C, s) = set_sat (x * C, r * s)
   1.858 + *
   1.859 + * The above holds for all non-zero x, because the x'es in the fraction for
   1.860 + * C_mid cancel out. Specifically, it holds for x = r:
   1.861 + *
   1.862 + *    r * set_sat (C, s) = set_sat (r_c, rs)
   1.863 + *
   1.864 + */
   1.865 +
   1.866 +/* So, for the non-separable PDF blend modes, we have (using s, d for
   1.867 + * non-premultiplied colors, and S, D for premultiplied:
   1.868 + *
   1.869 + *   Color:
   1.870 + *
   1.871 + *     a_s * a_d * B(s, d)
   1.872 + *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
   1.873 + *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
   1.874 + *
   1.875 + *
   1.876 + *   Luminosity:
   1.877 + *
   1.878 + *     a_s * a_d * B(s, d)
   1.879 + *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
   1.880 + *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
   1.881 + *
   1.882 + *
   1.883 + *   Saturation:
   1.884 + *
   1.885 + *     a_s * a_d * B(s, d)
   1.886 + *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
   1.887 + *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
   1.888 + *                                        a_s * LUM (D), a_s * a_d)
   1.889 + *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
   1.890 + *
   1.891 + *   Hue:
   1.892 + *
   1.893 + *     a_s * a_d * B(s, d)
   1.894 + *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
   1.895 + *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
   1.896 + *
   1.897 + */
   1.898 +
   1.899 +#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
   1.900 +#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
   1.901 +#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
   1.902 +#define SAT(c) (CH_MAX (c) - CH_MIN (c))
   1.903 +
   1.904 +#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
   1.905 +    static void								\
   1.906 +    combine_ ## name ## _u (pixman_implementation_t *imp,		\
   1.907 +			    pixman_op_t op,				\
   1.908 +                            uint64_t *dest,				\
   1.909 +			    const uint64_t *src,				\
   1.910 +			    const uint64_t *mask,			\
   1.911 +			    int width)					\
   1.912 +    {									\
   1.913 +	int i;								\
   1.914 +	for (i = 0; i < width; ++i)					\
   1.915 +	{								\
   1.916 +	    uint64_t s = combine_mask (src, mask, i);			\
   1.917 +	    uint64_t d = *(dest + i);					\
   1.918 +	    uint16_t sa = ALPHA_16 (s);					\
   1.919 +	    uint16_t isa = ~sa;						\
   1.920 +	    uint16_t da = ALPHA_16 (d);					\
   1.921 +	    uint16_t ida = ~da;						\
   1.922 +	    uint64_t result;						\
   1.923 +	    uint64_t sc[3], dc[3], c[3];					\
   1.924 +            								\
   1.925 +	    result = d;							\
   1.926 +	    UN16x4_MUL_UN16_ADD_UN16x4_MUL_UN16 (result, isa, s, ida);	\
   1.927 +	    dc[0] = RED_16 (d);						\
   1.928 +	    sc[0] = RED_16 (s);						\
   1.929 +	    dc[1] = GREEN_16 (d);					\
   1.930 +	    sc[1] = GREEN_16 (s);					\
   1.931 +	    dc[2] = BLUE_16 (d);						\
   1.932 +	    sc[2] = BLUE_16 (s);						\
   1.933 +	    blend_ ## name (c, dc, da, sc, sa);				\
   1.934 +            								\
   1.935 +	    *(dest + i) = result +					\
   1.936 +		(DIV_ONE_UN16 (sa * (uint64_t)da) << A_SHIFT) +		\
   1.937 +		(DIV_ONE_UN16 (c[0]) << R_SHIFT) +			\
   1.938 +		(DIV_ONE_UN16 (c[1]) << G_SHIFT) +			\
   1.939 +		(DIV_ONE_UN16 (c[2]));					\
   1.940 +	}								\
   1.941 +    }
   1.942 +
   1.943 +static void
   1.944 +set_lum (uint64_t dest[3], uint64_t src[3], uint64_t sa, uint64_t lum)
   1.945 +{
   1.946 +    double a, l, min, max;
   1.947 +    double tmp[3];
   1.948 +
   1.949 +    a = sa * (1.0 / MASK);
   1.950 +
   1.951 +    l = lum * (1.0 / MASK);
   1.952 +    tmp[0] = src[0] * (1.0 / MASK);
   1.953 +    tmp[1] = src[1] * (1.0 / MASK);
   1.954 +    tmp[2] = src[2] * (1.0 / MASK);
   1.955 +
   1.956 +    l = l - LUM (tmp);
   1.957 +    tmp[0] += l;
   1.958 +    tmp[1] += l;
   1.959 +    tmp[2] += l;
   1.960 +
   1.961 +    /* clip_color */
   1.962 +    l = LUM (tmp);
   1.963 +    min = CH_MIN (tmp);
   1.964 +    max = CH_MAX (tmp);
   1.965 +
   1.966 +    if (min < 0)
   1.967 +    {
   1.968 +	if (l - min == 0.0)
   1.969 +	{
   1.970 +	    tmp[0] = 0;
   1.971 +	    tmp[1] = 0;
   1.972 +	    tmp[2] = 0;
   1.973 +	}
   1.974 +	else
   1.975 +	{
   1.976 +	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
   1.977 +	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
   1.978 +	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
   1.979 +	}
   1.980 +    }
   1.981 +    if (max > a)
   1.982 +    {
   1.983 +	if (max - l == 0.0)
   1.984 +	{
   1.985 +	    tmp[0] = a;
   1.986 +	    tmp[1] = a;
   1.987 +	    tmp[2] = a;
   1.988 +	}
   1.989 +	else
   1.990 +	{
   1.991 +	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
   1.992 +	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
   1.993 +	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
   1.994 +	}
   1.995 +    }
   1.996 +
   1.997 +    dest[0] = tmp[0] * MASK + 0.5;
   1.998 +    dest[1] = tmp[1] * MASK + 0.5;
   1.999 +    dest[2] = tmp[2] * MASK + 0.5;
  1.1000 +}
  1.1001 +
  1.1002 +static void
  1.1003 +set_sat (uint64_t dest[3], uint64_t src[3], uint64_t sat)
  1.1004 +{
  1.1005 +    int id[3];
  1.1006 +    uint64_t min, max;
  1.1007 +
  1.1008 +    if (src[0] > src[1])
  1.1009 +    {
  1.1010 +	if (src[0] > src[2])
  1.1011 +	{
  1.1012 +	    id[0] = 0;
  1.1013 +	    if (src[1] > src[2])
  1.1014 +	    {
  1.1015 +		id[1] = 1;
  1.1016 +		id[2] = 2;
  1.1017 +	    }
  1.1018 +	    else
  1.1019 +	    {
  1.1020 +		id[1] = 2;
  1.1021 +		id[2] = 1;
  1.1022 +	    }
  1.1023 +	}
  1.1024 +	else
  1.1025 +	{
  1.1026 +	    id[0] = 2;
  1.1027 +	    id[1] = 0;
  1.1028 +	    id[2] = 1;
  1.1029 +	}
  1.1030 +    }
  1.1031 +    else
  1.1032 +    {
  1.1033 +	if (src[0] > src[2])
  1.1034 +	{
  1.1035 +	    id[0] = 1;
  1.1036 +	    id[1] = 0;
  1.1037 +	    id[2] = 2;
  1.1038 +	}
  1.1039 +	else
  1.1040 +	{
  1.1041 +	    id[2] = 0;
  1.1042 +	    if (src[1] > src[2])
  1.1043 +	    {
  1.1044 +		id[0] = 1;
  1.1045 +		id[1] = 2;
  1.1046 +	    }
  1.1047 +	    else
  1.1048 +	    {
  1.1049 +		id[0] = 2;
  1.1050 +		id[1] = 1;
  1.1051 +	    }
  1.1052 +	}
  1.1053 +    }
  1.1054 +
  1.1055 +    max = dest[id[0]];
  1.1056 +    min = dest[id[2]];
  1.1057 +    if (max > min)
  1.1058 +    {
  1.1059 +	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
  1.1060 +	dest[id[0]] = sat;
  1.1061 +	dest[id[2]] = 0;
  1.1062 +    }
  1.1063 +    else
  1.1064 +    {
  1.1065 +	dest[0] = dest[1] = dest[2] = 0;
  1.1066 +    }
  1.1067 +}
  1.1068 +
  1.1069 +/*
  1.1070 + * Hue:
  1.1071 + * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
  1.1072 + */
  1.1073 +static inline void
  1.1074 +blend_hsl_hue (uint64_t c[3],
  1.1075 +               uint64_t dc[3],
  1.1076 +               uint64_t da,
  1.1077 +               uint64_t sc[3],
  1.1078 +               uint64_t sa)
  1.1079 +{
  1.1080 +    c[0] = sc[0] * da;
  1.1081 +    c[1] = sc[1] * da;
  1.1082 +    c[2] = sc[2] * da;
  1.1083 +    set_sat (c, c, SAT (dc) * sa);
  1.1084 +    set_lum (c, c, sa * da, LUM (dc) * sa);
  1.1085 +}
  1.1086 +
  1.1087 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
  1.1088 +
  1.1089 +/*
  1.1090 + * Saturation:
  1.1091 + * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
  1.1092 + */
  1.1093 +static inline void
  1.1094 +blend_hsl_saturation (uint64_t c[3],
  1.1095 +                      uint64_t dc[3],
  1.1096 +                      uint64_t da,
  1.1097 +                      uint64_t sc[3],
  1.1098 +                      uint64_t sa)
  1.1099 +{
  1.1100 +    c[0] = dc[0] * sa;
  1.1101 +    c[1] = dc[1] * sa;
  1.1102 +    c[2] = dc[2] * sa;
  1.1103 +    set_sat (c, c, SAT (sc) * da);
  1.1104 +    set_lum (c, c, sa * da, LUM (dc) * sa);
  1.1105 +}
  1.1106 +
  1.1107 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
  1.1108 +
  1.1109 +/*
  1.1110 + * Color:
  1.1111 + * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
  1.1112 + */
  1.1113 +static inline void
  1.1114 +blend_hsl_color (uint64_t c[3],
  1.1115 +                 uint64_t dc[3],
  1.1116 +                 uint64_t da,
  1.1117 +                 uint64_t sc[3],
  1.1118 +                 uint64_t sa)
  1.1119 +{
  1.1120 +    c[0] = sc[0] * da;
  1.1121 +    c[1] = sc[1] * da;
  1.1122 +    c[2] = sc[2] * da;
  1.1123 +    set_lum (c, c, sa * da, LUM (dc) * sa);
  1.1124 +}
  1.1125 +
  1.1126 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
  1.1127 +
  1.1128 +/*
  1.1129 + * Luminosity:
  1.1130 + * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
  1.1131 + */
  1.1132 +static inline void
  1.1133 +blend_hsl_luminosity (uint64_t c[3],
  1.1134 +                      uint64_t dc[3],
  1.1135 +                      uint64_t da,
  1.1136 +                      uint64_t sc[3],
  1.1137 +                      uint64_t sa)
  1.1138 +{
  1.1139 +    c[0] = dc[0] * sa;
  1.1140 +    c[1] = dc[1] * sa;
  1.1141 +    c[2] = dc[2] * sa;
  1.1142 +    set_lum (c, c, sa * da, LUM (sc) * da);
  1.1143 +}
  1.1144 +
  1.1145 +PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
  1.1146 +
  1.1147 +#undef SAT
  1.1148 +#undef LUM
  1.1149 +#undef CH_MAX
  1.1150 +#undef CH_MIN
  1.1151 +#undef PDF_NON_SEPARABLE_BLEND_MODE
  1.1152 +
  1.1153 +/* All of the disjoint/conjoint composing functions
  1.1154 + *
  1.1155 + * The four entries in the first column indicate what source contributions
  1.1156 + * come from each of the four areas of the picture -- areas covered by neither
  1.1157 + * A nor B, areas covered only by A, areas covered only by B and finally
  1.1158 + * areas covered by both A and B.
  1.1159 + * 
  1.1160 + * Disjoint			Conjoint
  1.1161 + * Fa		Fb		Fa		Fb
  1.1162 + * (0,0,0,0)	0		0		0		0
  1.1163 + * (0,A,0,A)	1		0		1		0
  1.1164 + * (0,0,B,B)	0		1		0		1
  1.1165 + * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
  1.1166 + * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
  1.1167 + * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
  1.1168 + * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
  1.1169 + * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
  1.1170 + * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
  1.1171 + * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
  1.1172 + * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
  1.1173 + * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
  1.1174 + *
  1.1175 + * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
  1.1176 + * information about these operators.
  1.1177 + */
  1.1178 +
  1.1179 +#define COMBINE_A_OUT 1
  1.1180 +#define COMBINE_A_IN  2
  1.1181 +#define COMBINE_B_OUT 4
  1.1182 +#define COMBINE_B_IN  8
  1.1183 +
  1.1184 +#define COMBINE_CLEAR   0
  1.1185 +#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
  1.1186 +#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
  1.1187 +#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
  1.1188 +#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
  1.1189 +#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
  1.1190 +#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
  1.1191 +#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
  1.1192 +
  1.1193 +/* portion covered by a but not b */
  1.1194 +static uint16_t
  1.1195 +combine_disjoint_out_part (uint16_t a, uint16_t b)
  1.1196 +{
  1.1197 +    /* min (1, (1-b) / a) */
  1.1198 +
  1.1199 +    b = ~b;                 /* 1 - b */
  1.1200 +    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
  1.1201 +	return MASK;        /* 1 */
  1.1202 +    return DIV_UN16 (b, a);     /* (1-b) / a */
  1.1203 +}
  1.1204 +
  1.1205 +/* portion covered by both a and b */
  1.1206 +static uint16_t
  1.1207 +combine_disjoint_in_part (uint16_t a, uint16_t b)
  1.1208 +{
  1.1209 +    /* max (1-(1-b)/a,0) */
  1.1210 +    /*  = - min ((1-b)/a - 1, 0) */
  1.1211 +    /*  = 1 - min (1, (1-b)/a) */
  1.1212 +
  1.1213 +    b = ~b;                 /* 1 - b */
  1.1214 +    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
  1.1215 +	return 0;           /* 1 - 1 */
  1.1216 +    return ~DIV_UN16(b, a);    /* 1 - (1-b) / a */
  1.1217 +}
  1.1218 +
  1.1219 +/* portion covered by a but not b */
  1.1220 +static uint16_t
  1.1221 +combine_conjoint_out_part (uint16_t a, uint16_t b)
  1.1222 +{
  1.1223 +    /* max (1-b/a,0) */
  1.1224 +    /* = 1-min(b/a,1) */
  1.1225 +
  1.1226 +    /* min (1, (1-b) / a) */
  1.1227 +
  1.1228 +    if (b >= a)             /* b >= a -> b/a >= 1 */
  1.1229 +	return 0x00;        /* 0 */
  1.1230 +    return ~DIV_UN16(b, a);    /* 1 - b/a */
  1.1231 +}
  1.1232 +
  1.1233 +/* portion covered by both a and b */
  1.1234 +static uint16_t
  1.1235 +combine_conjoint_in_part (uint16_t a, uint16_t b)
  1.1236 +{
  1.1237 +    /* min (1,b/a) */
  1.1238 +
  1.1239 +    if (b >= a)             /* b >= a -> b/a >= 1 */
  1.1240 +	return MASK;        /* 1 */
  1.1241 +    return DIV_UN16 (b, a);     /* b/a */
  1.1242 +}
  1.1243 +
  1.1244 +#define GET_COMP(v, i)   ((uint32_t) (uint16_t) ((v) >> i))
  1.1245 +
  1.1246 +#define ADD(x, y, i, t)							\
  1.1247 +    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
  1.1248 +     (uint64_t) ((uint16_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
  1.1249 +
  1.1250 +#define GENERIC(x, y, i, ax, ay, t, u, v)				\
  1.1251 +    ((t) = (MUL_UN16 (GET_COMP (y, i), ay, (u)) +			\
  1.1252 +            MUL_UN16 (GET_COMP (x, i), ax, (v))),			\
  1.1253 +     (uint64_t) ((uint16_t) ((t) |					\
  1.1254 +                           (0 - ((t) >> G_SHIFT)))) << (i))
  1.1255 +
  1.1256 +static void
  1.1257 +combine_disjoint_general_u (uint64_t *      dest,
  1.1258 +                            const uint64_t *src,
  1.1259 +                            const uint64_t *mask,
  1.1260 +                            int            width,
  1.1261 +                            uint16_t        combine)
  1.1262 +{
  1.1263 +    int i;
  1.1264 +
  1.1265 +    for (i = 0; i < width; ++i)
  1.1266 +    {
  1.1267 +	uint64_t s = combine_mask (src, mask, i);
  1.1268 +	uint64_t d = *(dest + i);
  1.1269 +	uint64_t m, n, o, p;
  1.1270 +	uint32_t Fa, Fb, t, u, v;
  1.1271 +	uint16_t sa = s >> A_SHIFT;
  1.1272 +	uint16_t da = d >> A_SHIFT;
  1.1273 +
  1.1274 +	switch (combine & COMBINE_A)
  1.1275 +	{
  1.1276 +	default:
  1.1277 +	    Fa = 0;
  1.1278 +	    break;
  1.1279 +
  1.1280 +	case COMBINE_A_OUT:
  1.1281 +	    Fa = combine_disjoint_out_part (sa, da);
  1.1282 +	    break;
  1.1283 +
  1.1284 +	case COMBINE_A_IN:
  1.1285 +	    Fa = combine_disjoint_in_part (sa, da);
  1.1286 +	    break;
  1.1287 +
  1.1288 +	case COMBINE_A:
  1.1289 +	    Fa = MASK;
  1.1290 +	    break;
  1.1291 +	}
  1.1292 +
  1.1293 +	switch (combine & COMBINE_B)
  1.1294 +	{
  1.1295 +	default:
  1.1296 +	    Fb = 0;
  1.1297 +	    break;
  1.1298 +
  1.1299 +	case COMBINE_B_OUT:
  1.1300 +	    Fb = combine_disjoint_out_part (da, sa);
  1.1301 +	    break;
  1.1302 +
  1.1303 +	case COMBINE_B_IN:
  1.1304 +	    Fb = combine_disjoint_in_part (da, sa);
  1.1305 +	    break;
  1.1306 +
  1.1307 +	case COMBINE_B:
  1.1308 +	    Fb = MASK;
  1.1309 +	    break;
  1.1310 +	}
  1.1311 +	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
  1.1312 +	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
  1.1313 +	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
  1.1314 +	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
  1.1315 +	s = m | n | o | p;
  1.1316 +	*(dest + i) = s;
  1.1317 +    }
  1.1318 +}
  1.1319 +
  1.1320 +static void
  1.1321 +combine_disjoint_over_u (pixman_implementation_t *imp,
  1.1322 +                         pixman_op_t              op,
  1.1323 +                         uint64_t *                dest,
  1.1324 +                         const uint64_t *          src,
  1.1325 +                         const uint64_t *          mask,
  1.1326 +                         int                      width)
  1.1327 +{
  1.1328 +    int i;
  1.1329 +
  1.1330 +    for (i = 0; i < width; ++i)
  1.1331 +    {
  1.1332 +	uint64_t s = combine_mask (src, mask, i);
  1.1333 +	uint32_t a = s >> A_SHIFT;
  1.1334 +
  1.1335 +	if (s != 0x00)
  1.1336 +	{
  1.1337 +	    uint64_t d = *(dest + i);
  1.1338 +	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
  1.1339 +	    UN16x4_MUL_UN16_ADD_UN16x4 (d, a, s);
  1.1340 +
  1.1341 +	    *(dest + i) = d;
  1.1342 +	}
  1.1343 +    }
  1.1344 +}
  1.1345 +
  1.1346 +static void
  1.1347 +combine_disjoint_in_u (pixman_implementation_t *imp,
  1.1348 +                       pixman_op_t              op,
  1.1349 +                       uint64_t *                dest,
  1.1350 +                       const uint64_t *          src,
  1.1351 +                       const uint64_t *          mask,
  1.1352 +                       int                      width)
  1.1353 +{
  1.1354 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
  1.1355 +}
  1.1356 +
  1.1357 +static void
  1.1358 +combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
  1.1359 +                               pixman_op_t              op,
  1.1360 +                               uint64_t *                dest,
  1.1361 +                               const uint64_t *          src,
  1.1362 +                               const uint64_t *          mask,
  1.1363 +                               int                      width)
  1.1364 +{
  1.1365 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
  1.1366 +}
  1.1367 +
  1.1368 +static void
  1.1369 +combine_disjoint_out_u (pixman_implementation_t *imp,
  1.1370 +                        pixman_op_t              op,
  1.1371 +                        uint64_t *                dest,
  1.1372 +                        const uint64_t *          src,
  1.1373 +                        const uint64_t *          mask,
  1.1374 +                        int                      width)
  1.1375 +{
  1.1376 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
  1.1377 +}
  1.1378 +
  1.1379 +static void
  1.1380 +combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
  1.1381 +                                pixman_op_t              op,
  1.1382 +                                uint64_t *                dest,
  1.1383 +                                const uint64_t *          src,
  1.1384 +                                const uint64_t *          mask,
  1.1385 +                                int                      width)
  1.1386 +{
  1.1387 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
  1.1388 +}
  1.1389 +
  1.1390 +static void
  1.1391 +combine_disjoint_atop_u (pixman_implementation_t *imp,
  1.1392 +                         pixman_op_t              op,
  1.1393 +                         uint64_t *                dest,
  1.1394 +                         const uint64_t *          src,
  1.1395 +                         const uint64_t *          mask,
  1.1396 +                         int                      width)
  1.1397 +{
  1.1398 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
  1.1399 +}
  1.1400 +
  1.1401 +static void
  1.1402 +combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
  1.1403 +                                 pixman_op_t              op,
  1.1404 +                                 uint64_t *                dest,
  1.1405 +                                 const uint64_t *          src,
  1.1406 +                                 const uint64_t *          mask,
  1.1407 +                                 int                      width)
  1.1408 +{
  1.1409 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
  1.1410 +}
  1.1411 +
  1.1412 +static void
  1.1413 +combine_disjoint_xor_u (pixman_implementation_t *imp,
  1.1414 +                        pixman_op_t              op,
  1.1415 +                        uint64_t *                dest,
  1.1416 +                        const uint64_t *          src,
  1.1417 +                        const uint64_t *          mask,
  1.1418 +                        int                      width)
  1.1419 +{
  1.1420 +    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
  1.1421 +}
  1.1422 +
  1.1423 +static void
  1.1424 +combine_conjoint_general_u (uint64_t *      dest,
  1.1425 +                            const uint64_t *src,
  1.1426 +                            const uint64_t *mask,
  1.1427 +                            int            width,
  1.1428 +                            uint16_t        combine)
  1.1429 +{
  1.1430 +    int i;
  1.1431 +
  1.1432 +    for (i = 0; i < width; ++i)
  1.1433 +    {
  1.1434 +	uint64_t s = combine_mask (src, mask, i);
  1.1435 +	uint64_t d = *(dest + i);
  1.1436 +	uint64_t m, n, o, p;
  1.1437 +	uint32_t Fa, Fb, t, u, v;
  1.1438 +	uint16_t sa = s >> A_SHIFT;
  1.1439 +	uint16_t da = d >> A_SHIFT;
  1.1440 +
  1.1441 +	switch (combine & COMBINE_A)
  1.1442 +	{
  1.1443 +	default:
  1.1444 +	    Fa = 0;
  1.1445 +	    break;
  1.1446 +
  1.1447 +	case COMBINE_A_OUT:
  1.1448 +	    Fa = combine_conjoint_out_part (sa, da);
  1.1449 +	    break;
  1.1450 +
  1.1451 +	case COMBINE_A_IN:
  1.1452 +	    Fa = combine_conjoint_in_part (sa, da);
  1.1453 +	    break;
  1.1454 +
  1.1455 +	case COMBINE_A:
  1.1456 +	    Fa = MASK;
  1.1457 +	    break;
  1.1458 +	}
  1.1459 +
  1.1460 +	switch (combine & COMBINE_B)
  1.1461 +	{
  1.1462 +	default:
  1.1463 +	    Fb = 0;
  1.1464 +	    break;
  1.1465 +
  1.1466 +	case COMBINE_B_OUT:
  1.1467 +	    Fb = combine_conjoint_out_part (da, sa);
  1.1468 +	    break;
  1.1469 +
  1.1470 +	case COMBINE_B_IN:
  1.1471 +	    Fb = combine_conjoint_in_part (da, sa);
  1.1472 +	    break;
  1.1473 +
  1.1474 +	case COMBINE_B:
  1.1475 +	    Fb = MASK;
  1.1476 +	    break;
  1.1477 +	}
  1.1478 +
  1.1479 +	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
  1.1480 +	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
  1.1481 +	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
  1.1482 +	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
  1.1483 +
  1.1484 +	s = m | n | o | p;
  1.1485 +
  1.1486 +	*(dest + i) = s;
  1.1487 +    }
  1.1488 +}
  1.1489 +
  1.1490 +static void
  1.1491 +combine_conjoint_over_u (pixman_implementation_t *imp,
  1.1492 +                         pixman_op_t              op,
  1.1493 +                         uint64_t *                dest,
  1.1494 +                         const uint64_t *          src,
  1.1495 +                         const uint64_t *          mask,
  1.1496 +                         int                      width)
  1.1497 +{
  1.1498 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
  1.1499 +}
  1.1500 +
  1.1501 +static void
  1.1502 +combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
  1.1503 +                                 pixman_op_t              op,
  1.1504 +                                 uint64_t *                dest,
  1.1505 +                                 const uint64_t *          src,
  1.1506 +                                 const uint64_t *          mask,
  1.1507 +                                 int                      width)
  1.1508 +{
  1.1509 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
  1.1510 +}
  1.1511 +
  1.1512 +static void
  1.1513 +combine_conjoint_in_u (pixman_implementation_t *imp,
  1.1514 +                       pixman_op_t              op,
  1.1515 +                       uint64_t *                dest,
  1.1516 +                       const uint64_t *          src,
  1.1517 +                       const uint64_t *          mask,
  1.1518 +                       int                      width)
  1.1519 +{
  1.1520 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
  1.1521 +}
  1.1522 +
  1.1523 +static void
  1.1524 +combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
  1.1525 +                               pixman_op_t              op,
  1.1526 +                               uint64_t *                dest,
  1.1527 +                               const uint64_t *          src,
  1.1528 +                               const uint64_t *          mask,
  1.1529 +                               int                      width)
  1.1530 +{
  1.1531 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
  1.1532 +}
  1.1533 +
  1.1534 +static void
  1.1535 +combine_conjoint_out_u (pixman_implementation_t *imp,
  1.1536 +                        pixman_op_t              op,
  1.1537 +                        uint64_t *                dest,
  1.1538 +                        const uint64_t *          src,
  1.1539 +                        const uint64_t *          mask,
  1.1540 +                        int                      width)
  1.1541 +{
  1.1542 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
  1.1543 +}
  1.1544 +
  1.1545 +static void
  1.1546 +combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
  1.1547 +                                pixman_op_t              op,
  1.1548 +                                uint64_t *                dest,
  1.1549 +                                const uint64_t *          src,
  1.1550 +                                const uint64_t *          mask,
  1.1551 +                                int                      width)
  1.1552 +{
  1.1553 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
  1.1554 +}
  1.1555 +
  1.1556 +static void
  1.1557 +combine_conjoint_atop_u (pixman_implementation_t *imp,
  1.1558 +                         pixman_op_t              op,
  1.1559 +                         uint64_t *                dest,
  1.1560 +                         const uint64_t *          src,
  1.1561 +                         const uint64_t *          mask,
  1.1562 +                         int                      width)
  1.1563 +{
  1.1564 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
  1.1565 +}
  1.1566 +
  1.1567 +static void
  1.1568 +combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
  1.1569 +                                 pixman_op_t              op,
  1.1570 +                                 uint64_t *                dest,
  1.1571 +                                 const uint64_t *          src,
  1.1572 +                                 const uint64_t *          mask,
  1.1573 +                                 int                      width)
  1.1574 +{
  1.1575 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
  1.1576 +}
  1.1577 +
  1.1578 +static void
  1.1579 +combine_conjoint_xor_u (pixman_implementation_t *imp,
  1.1580 +                        pixman_op_t              op,
  1.1581 +                        uint64_t *                dest,
  1.1582 +                        const uint64_t *          src,
  1.1583 +                        const uint64_t *          mask,
  1.1584 +                        int                      width)
  1.1585 +{
  1.1586 +    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
  1.1587 +}
  1.1588 +
  1.1589 +/************************************************************************/
  1.1590 +/*********************** Per Channel functions **************************/
  1.1591 +/************************************************************************/
  1.1592 +
  1.1593 +static void
  1.1594 +combine_clear_ca (pixman_implementation_t *imp,
  1.1595 +                  pixman_op_t              op,
  1.1596 +                  uint64_t *                dest,
  1.1597 +                  const uint64_t *          src,
  1.1598 +                  const uint64_t *          mask,
  1.1599 +                  int                      width)
  1.1600 +{
  1.1601 +    memset (dest, 0, width * sizeof(uint64_t));
  1.1602 +}
  1.1603 +
  1.1604 +static void
  1.1605 +combine_src_ca (pixman_implementation_t *imp,
  1.1606 +                pixman_op_t              op,
  1.1607 +                uint64_t *                dest,
  1.1608 +                const uint64_t *          src,
  1.1609 +                const uint64_t *          mask,
  1.1610 +                int                      width)
  1.1611 +{
  1.1612 +    int i;
  1.1613 +
  1.1614 +    for (i = 0; i < width; ++i)
  1.1615 +    {
  1.1616 +	uint64_t s = *(src + i);
  1.1617 +	uint64_t m = *(mask + i);
  1.1618 +
  1.1619 +	combine_mask_value_ca (&s, &m);
  1.1620 +
  1.1621 +	*(dest + i) = s;
  1.1622 +    }
  1.1623 +}
  1.1624 +
  1.1625 +static void
  1.1626 +combine_over_ca (pixman_implementation_t *imp,
  1.1627 +                 pixman_op_t              op,
  1.1628 +                 uint64_t *                dest,
  1.1629 +                 const uint64_t *          src,
  1.1630 +                 const uint64_t *          mask,
  1.1631 +                 int                      width)
  1.1632 +{
  1.1633 +    int i;
  1.1634 +
  1.1635 +    for (i = 0; i < width; ++i)
  1.1636 +    {
  1.1637 +	uint64_t s = *(src + i);
  1.1638 +	uint64_t m = *(mask + i);
  1.1639 +	uint64_t a;
  1.1640 +
  1.1641 +	combine_mask_ca (&s, &m);
  1.1642 +
  1.1643 +	a = ~m;
  1.1644 +	if (a)
  1.1645 +	{
  1.1646 +	    uint64_t d = *(dest + i);
  1.1647 +	    UN16x4_MUL_UN16x4_ADD_UN16x4 (d, a, s);
  1.1648 +	    s = d;
  1.1649 +	}
  1.1650 +
  1.1651 +	*(dest + i) = s;
  1.1652 +    }
  1.1653 +}
  1.1654 +
  1.1655 +static void
  1.1656 +combine_over_reverse_ca (pixman_implementation_t *imp,
  1.1657 +                         pixman_op_t              op,
  1.1658 +                         uint64_t *                dest,
  1.1659 +                         const uint64_t *          src,
  1.1660 +                         const uint64_t *          mask,
  1.1661 +                         int                      width)
  1.1662 +{
  1.1663 +    int i;
  1.1664 +
  1.1665 +    for (i = 0; i < width; ++i)
  1.1666 +    {
  1.1667 +	uint64_t d = *(dest + i);
  1.1668 +	uint64_t a = ~d >> A_SHIFT;
  1.1669 +
  1.1670 +	if (a)
  1.1671 +	{
  1.1672 +	    uint64_t s = *(src + i);
  1.1673 +	    uint64_t m = *(mask + i);
  1.1674 +
  1.1675 +	    UN16x4_MUL_UN16x4 (s, m);
  1.1676 +	    UN16x4_MUL_UN16_ADD_UN16x4 (s, a, d);
  1.1677 +
  1.1678 +	    *(dest + i) = s;
  1.1679 +	}
  1.1680 +    }
  1.1681 +}
  1.1682 +
  1.1683 +static void
  1.1684 +combine_in_ca (pixman_implementation_t *imp,
  1.1685 +               pixman_op_t              op,
  1.1686 +               uint64_t *                dest,
  1.1687 +               const uint64_t *          src,
  1.1688 +               const uint64_t *          mask,
  1.1689 +               int                      width)
  1.1690 +{
  1.1691 +    int i;
  1.1692 +
  1.1693 +    for (i = 0; i < width; ++i)
  1.1694 +    {
  1.1695 +	uint64_t d = *(dest + i);
  1.1696 +	uint32_t a = d >> A_SHIFT;
  1.1697 +	uint64_t s = 0;
  1.1698 +
  1.1699 +	if (a)
  1.1700 +	{
  1.1701 +	    uint64_t m = *(mask + i);
  1.1702 +
  1.1703 +	    s = *(src + i);
  1.1704 +	    combine_mask_value_ca (&s, &m);
  1.1705 +
  1.1706 +	    if (a != MASK)
  1.1707 +		UN16x4_MUL_UN16 (s, a);
  1.1708 +	}
  1.1709 +
  1.1710 +	*(dest + i) = s;
  1.1711 +    }
  1.1712 +}
  1.1713 +
  1.1714 +static void
  1.1715 +combine_in_reverse_ca (pixman_implementation_t *imp,
  1.1716 +                       pixman_op_t              op,
  1.1717 +                       uint64_t *                dest,
  1.1718 +                       const uint64_t *          src,
  1.1719 +                       const uint64_t *          mask,
  1.1720 +                       int                      width)
  1.1721 +{
  1.1722 +    int i;
  1.1723 +
  1.1724 +    for (i = 0; i < width; ++i)
  1.1725 +    {
  1.1726 +	uint64_t s = *(src + i);
  1.1727 +	uint64_t m = *(mask + i);
  1.1728 +	uint64_t a;
  1.1729 +
  1.1730 +	combine_mask_alpha_ca (&s, &m);
  1.1731 +
  1.1732 +	a = m;
  1.1733 +	if (a != ~0)
  1.1734 +	{
  1.1735 +	    uint64_t d = 0;
  1.1736 +
  1.1737 +	    if (a)
  1.1738 +	    {
  1.1739 +		d = *(dest + i);
  1.1740 +		UN16x4_MUL_UN16x4 (d, a);
  1.1741 +	    }
  1.1742 +
  1.1743 +	    *(dest + i) = d;
  1.1744 +	}
  1.1745 +    }
  1.1746 +}
  1.1747 +
  1.1748 +static void
  1.1749 +combine_out_ca (pixman_implementation_t *imp,
  1.1750 +                pixman_op_t              op,
  1.1751 +                uint64_t *                dest,
  1.1752 +                const uint64_t *          src,
  1.1753 +                const uint64_t *          mask,
  1.1754 +                int                      width)
  1.1755 +{
  1.1756 +    int i;
  1.1757 +
  1.1758 +    for (i = 0; i < width; ++i)
  1.1759 +    {
  1.1760 +	uint64_t d = *(dest + i);
  1.1761 +	uint32_t a = ~d >> A_SHIFT;
  1.1762 +	uint64_t s = 0;
  1.1763 +
  1.1764 +	if (a)
  1.1765 +	{
  1.1766 +	    uint64_t m = *(mask + i);
  1.1767 +
  1.1768 +	    s = *(src + i);
  1.1769 +	    combine_mask_value_ca (&s, &m);
  1.1770 +
  1.1771 +	    if (a != MASK)
  1.1772 +		UN16x4_MUL_UN16 (s, a);
  1.1773 +	}
  1.1774 +
  1.1775 +	*(dest + i) = s;
  1.1776 +    }
  1.1777 +}
  1.1778 +
  1.1779 +static void
  1.1780 +combine_out_reverse_ca (pixman_implementation_t *imp,
  1.1781 +                        pixman_op_t              op,
  1.1782 +                        uint64_t *                dest,
  1.1783 +                        const uint64_t *          src,
  1.1784 +                        const uint64_t *          mask,
  1.1785 +                        int                      width)
  1.1786 +{
  1.1787 +    int i;
  1.1788 +
  1.1789 +    for (i = 0; i < width; ++i)
  1.1790 +    {
  1.1791 +	uint64_t s = *(src + i);
  1.1792 +	uint64_t m = *(mask + i);
  1.1793 +	uint64_t a;
  1.1794 +
  1.1795 +	combine_mask_alpha_ca (&s, &m);
  1.1796 +
  1.1797 +	a = ~m;
  1.1798 +	if (a != ~0)
  1.1799 +	{
  1.1800 +	    uint64_t d = 0;
  1.1801 +
  1.1802 +	    if (a)
  1.1803 +	    {
  1.1804 +		d = *(dest + i);
  1.1805 +		UN16x4_MUL_UN16x4 (d, a);
  1.1806 +	    }
  1.1807 +
  1.1808 +	    *(dest + i) = d;
  1.1809 +	}
  1.1810 +    }
  1.1811 +}
  1.1812 +
  1.1813 +static void
  1.1814 +combine_atop_ca (pixman_implementation_t *imp,
  1.1815 +                 pixman_op_t              op,
  1.1816 +                 uint64_t *                dest,
  1.1817 +                 const uint64_t *          src,
  1.1818 +                 const uint64_t *          mask,
  1.1819 +                 int                      width)
  1.1820 +{
  1.1821 +    int i;
  1.1822 +
  1.1823 +    for (i = 0; i < width; ++i)
  1.1824 +    {
  1.1825 +	uint64_t d = *(dest + i);
  1.1826 +	uint64_t s = *(src + i);
  1.1827 +	uint64_t m = *(mask + i);
  1.1828 +	uint64_t ad;
  1.1829 +	uint32_t as = d >> A_SHIFT;
  1.1830 +
  1.1831 +	combine_mask_ca (&s, &m);
  1.1832 +
  1.1833 +	ad = ~m;
  1.1834 +
  1.1835 +	UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as);
  1.1836 +
  1.1837 +	*(dest + i) = d;
  1.1838 +    }
  1.1839 +}
  1.1840 +
  1.1841 +static void
  1.1842 +combine_atop_reverse_ca (pixman_implementation_t *imp,
  1.1843 +                         pixman_op_t              op,
  1.1844 +                         uint64_t *                dest,
  1.1845 +                         const uint64_t *          src,
  1.1846 +                         const uint64_t *          mask,
  1.1847 +                         int                      width)
  1.1848 +{
  1.1849 +    int i;
  1.1850 +
  1.1851 +    for (i = 0; i < width; ++i)
  1.1852 +    {
  1.1853 +	uint64_t d = *(dest + i);
  1.1854 +	uint64_t s = *(src + i);
  1.1855 +	uint64_t m = *(mask + i);
  1.1856 +	uint64_t ad;
  1.1857 +	uint32_t as = ~d >> A_SHIFT;
  1.1858 +
  1.1859 +	combine_mask_ca (&s, &m);
  1.1860 +
  1.1861 +	ad = m;
  1.1862 +
  1.1863 +	UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as);
  1.1864 +
  1.1865 +	*(dest + i) = d;
  1.1866 +    }
  1.1867 +}
  1.1868 +
  1.1869 +static void
  1.1870 +combine_xor_ca (pixman_implementation_t *imp,
  1.1871 +                pixman_op_t              op,
  1.1872 +                uint64_t *                dest,
  1.1873 +                const uint64_t *          src,
  1.1874 +                const uint64_t *          mask,
  1.1875 +                int                      width)
  1.1876 +{
  1.1877 +    int i;
  1.1878 +
  1.1879 +    for (i = 0; i < width; ++i)
  1.1880 +    {
  1.1881 +	uint64_t d = *(dest + i);
  1.1882 +	uint64_t s = *(src + i);
  1.1883 +	uint64_t m = *(mask + i);
  1.1884 +	uint64_t ad;
  1.1885 +	uint32_t as = ~d >> A_SHIFT;
  1.1886 +
  1.1887 +	combine_mask_ca (&s, &m);
  1.1888 +
  1.1889 +	ad = ~m;
  1.1890 +
  1.1891 +	UN16x4_MUL_UN16x4_ADD_UN16x4_MUL_UN16 (d, ad, s, as);
  1.1892 +
  1.1893 +	*(dest + i) = d;
  1.1894 +    }
  1.1895 +}
  1.1896 +
  1.1897 +static void
  1.1898 +combine_add_ca (pixman_implementation_t *imp,
  1.1899 +                pixman_op_t              op,
  1.1900 +                uint64_t *                dest,
  1.1901 +                const uint64_t *          src,
  1.1902 +                const uint64_t *          mask,
  1.1903 +                int                      width)
  1.1904 +{
  1.1905 +    int i;
  1.1906 +
  1.1907 +    for (i = 0; i < width; ++i)
  1.1908 +    {
  1.1909 +	uint64_t s = *(src + i);
  1.1910 +	uint64_t m = *(mask + i);
  1.1911 +	uint64_t d = *(dest + i);
  1.1912 +
  1.1913 +	combine_mask_value_ca (&s, &m);
  1.1914 +
  1.1915 +	UN16x4_ADD_UN16x4 (d, s);
  1.1916 +
  1.1917 +	*(dest + i) = d;
  1.1918 +    }
  1.1919 +}
  1.1920 +
  1.1921 +static void
  1.1922 +combine_saturate_ca (pixman_implementation_t *imp,
  1.1923 +                     pixman_op_t              op,
  1.1924 +                     uint64_t *                dest,
  1.1925 +                     const uint64_t *          src,
  1.1926 +                     const uint64_t *          mask,
  1.1927 +                     int                      width)
  1.1928 +{
  1.1929 +    int i;
  1.1930 +
  1.1931 +    for (i = 0; i < width; ++i)
  1.1932 +    {
  1.1933 +	uint64_t s, d;
  1.1934 +	uint32_t sa, sr, sg, sb, da;
  1.1935 +	uint32_t t, u, v;
  1.1936 +	uint64_t m, n, o, p;
  1.1937 +
  1.1938 +	d = *(dest + i);
  1.1939 +	s = *(src + i);
  1.1940 +	m = *(mask + i);
  1.1941 +
  1.1942 +	combine_mask_ca (&s, &m);
  1.1943 +
  1.1944 +	sa = (m >> A_SHIFT);
  1.1945 +	sr = (m >> R_SHIFT) & MASK;
  1.1946 +	sg = (m >> G_SHIFT) & MASK;
  1.1947 +	sb =  m             & MASK;
  1.1948 +	da = ~d >> A_SHIFT;
  1.1949 +
  1.1950 +	if (sb <= da)
  1.1951 +	    m = ADD (s, d, 0, t);
  1.1952 +	else
  1.1953 +	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
  1.1954 +
  1.1955 +	if (sg <= da)
  1.1956 +	    n = ADD (s, d, G_SHIFT, t);
  1.1957 +	else
  1.1958 +	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
  1.1959 +
  1.1960 +	if (sr <= da)
  1.1961 +	    o = ADD (s, d, R_SHIFT, t);
  1.1962 +	else
  1.1963 +	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
  1.1964 +
  1.1965 +	if (sa <= da)
  1.1966 +	    p = ADD (s, d, A_SHIFT, t);
  1.1967 +	else
  1.1968 +	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
  1.1969 +
  1.1970 +	*(dest + i) = m | n | o | p;
  1.1971 +    }
  1.1972 +}
  1.1973 +
  1.1974 +static void
  1.1975 +combine_disjoint_general_ca (uint64_t *      dest,
  1.1976 +                             const uint64_t *src,
  1.1977 +                             const uint64_t *mask,
  1.1978 +                             int            width,
  1.1979 +                             uint16_t        combine)
  1.1980 +{
  1.1981 +    int i;
  1.1982 +
  1.1983 +    for (i = 0; i < width; ++i)
  1.1984 +    {
  1.1985 +	uint64_t s, d;
  1.1986 +	uint64_t m, n, o, p;
  1.1987 +	uint64_t Fa, Fb;
  1.1988 +	uint32_t t, u, v;
  1.1989 +	uint64_t sa;
  1.1990 +	uint16_t da;
  1.1991 +
  1.1992 +	s = *(src + i);
  1.1993 +	m = *(mask + i);
  1.1994 +	d = *(dest + i);
  1.1995 +	da = d >> A_SHIFT;
  1.1996 +
  1.1997 +	combine_mask_ca (&s, &m);
  1.1998 +
  1.1999 +	sa = m;
  1.2000 +
  1.2001 +	switch (combine & COMBINE_A)
  1.2002 +	{
  1.2003 +	default:
  1.2004 +	    Fa = 0;
  1.2005 +	    break;
  1.2006 +
  1.2007 +	case COMBINE_A_OUT:
  1.2008 +	    m = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> 0), da);
  1.2009 +	    n = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2010 +	    o = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2011 +	    p = (uint64_t)combine_disjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2012 +	    Fa = m | n | o | p;
  1.2013 +	    break;
  1.2014 +
  1.2015 +	case COMBINE_A_IN:
  1.2016 +	    m = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> 0), da);
  1.2017 +	    n = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2018 +	    o = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2019 +	    p = (uint64_t)combine_disjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2020 +	    Fa = m | n | o | p;
  1.2021 +	    break;
  1.2022 +
  1.2023 +	case COMBINE_A:
  1.2024 +	    Fa = ~0;
  1.2025 +	    break;
  1.2026 +	}
  1.2027 +
  1.2028 +	switch (combine & COMBINE_B)
  1.2029 +	{
  1.2030 +	default:
  1.2031 +	    Fb = 0;
  1.2032 +	    break;
  1.2033 +
  1.2034 +	case COMBINE_B_OUT:
  1.2035 +	    m = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> 0));
  1.2036 +	    n = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2037 +	    o = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2038 +	    p = (uint64_t)combine_disjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2039 +	    Fb = m | n | o | p;
  1.2040 +	    break;
  1.2041 +
  1.2042 +	case COMBINE_B_IN:
  1.2043 +	    m = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> 0));
  1.2044 +	    n = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2045 +	    o = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2046 +	    p = (uint64_t)combine_disjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2047 +	    Fb = m | n | o | p;
  1.2048 +	    break;
  1.2049 +
  1.2050 +	case COMBINE_B:
  1.2051 +	    Fb = ~0;
  1.2052 +	    break;
  1.2053 +	}
  1.2054 +	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
  1.2055 +	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
  1.2056 +	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
  1.2057 +	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
  1.2058 +
  1.2059 +	s = m | n | o | p;
  1.2060 +
  1.2061 +	*(dest + i) = s;
  1.2062 +    }
  1.2063 +}
  1.2064 +
  1.2065 +static void
  1.2066 +combine_disjoint_over_ca (pixman_implementation_t *imp,
  1.2067 +                          pixman_op_t              op,
  1.2068 +                          uint64_t *                dest,
  1.2069 +                          const uint64_t *          src,
  1.2070 +                          const uint64_t *          mask,
  1.2071 +                          int                      width)
  1.2072 +{
  1.2073 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
  1.2074 +}
  1.2075 +
  1.2076 +static void
  1.2077 +combine_disjoint_in_ca (pixman_implementation_t *imp,
  1.2078 +                        pixman_op_t              op,
  1.2079 +                        uint64_t *                dest,
  1.2080 +                        const uint64_t *          src,
  1.2081 +                        const uint64_t *          mask,
  1.2082 +                        int                      width)
  1.2083 +{
  1.2084 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
  1.2085 +}
  1.2086 +
  1.2087 +static void
  1.2088 +combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
  1.2089 +                                pixman_op_t              op,
  1.2090 +                                uint64_t *                dest,
  1.2091 +                                const uint64_t *          src,
  1.2092 +                                const uint64_t *          mask,
  1.2093 +                                int                      width)
  1.2094 +{
  1.2095 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
  1.2096 +}
  1.2097 +
  1.2098 +static void
  1.2099 +combine_disjoint_out_ca (pixman_implementation_t *imp,
  1.2100 +                         pixman_op_t              op,
  1.2101 +                         uint64_t *                dest,
  1.2102 +                         const uint64_t *          src,
  1.2103 +                         const uint64_t *          mask,
  1.2104 +                         int                      width)
  1.2105 +{
  1.2106 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
  1.2107 +}
  1.2108 +
  1.2109 +static void
  1.2110 +combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
  1.2111 +                                 pixman_op_t              op,
  1.2112 +                                 uint64_t *                dest,
  1.2113 +                                 const uint64_t *          src,
  1.2114 +                                 const uint64_t *          mask,
  1.2115 +                                 int                      width)
  1.2116 +{
  1.2117 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
  1.2118 +}
  1.2119 +
  1.2120 +static void
  1.2121 +combine_disjoint_atop_ca (pixman_implementation_t *imp,
  1.2122 +                          pixman_op_t              op,
  1.2123 +                          uint64_t *                dest,
  1.2124 +                          const uint64_t *          src,
  1.2125 +                          const uint64_t *          mask,
  1.2126 +                          int                      width)
  1.2127 +{
  1.2128 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
  1.2129 +}
  1.2130 +
  1.2131 +static void
  1.2132 +combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
  1.2133 +                                  pixman_op_t              op,
  1.2134 +                                  uint64_t *                dest,
  1.2135 +                                  const uint64_t *          src,
  1.2136 +                                  const uint64_t *          mask,
  1.2137 +                                  int                      width)
  1.2138 +{
  1.2139 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
  1.2140 +}
  1.2141 +
  1.2142 +static void
  1.2143 +combine_disjoint_xor_ca (pixman_implementation_t *imp,
  1.2144 +                         pixman_op_t              op,
  1.2145 +                         uint64_t *                dest,
  1.2146 +                         const uint64_t *          src,
  1.2147 +                         const uint64_t *          mask,
  1.2148 +                         int                      width)
  1.2149 +{
  1.2150 +    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
  1.2151 +}
  1.2152 +
  1.2153 +static void
  1.2154 +combine_conjoint_general_ca (uint64_t *      dest,
  1.2155 +                             const uint64_t *src,
  1.2156 +                             const uint64_t *mask,
  1.2157 +                             int            width,
  1.2158 +                             uint16_t        combine)
  1.2159 +{
  1.2160 +    int i;
  1.2161 +
  1.2162 +    for (i = 0; i < width; ++i)
  1.2163 +    {
  1.2164 +	uint64_t s, d;
  1.2165 +	uint64_t m, n, o, p;
  1.2166 +	uint64_t Fa, Fb;
  1.2167 +	uint32_t t, u, v;
  1.2168 +	uint64_t sa;
  1.2169 +	uint16_t da;
  1.2170 +
  1.2171 +	s = *(src + i);
  1.2172 +	m = *(mask + i);
  1.2173 +	d = *(dest + i);
  1.2174 +	da = d >> A_SHIFT;
  1.2175 +
  1.2176 +	combine_mask_ca (&s, &m);
  1.2177 +
  1.2178 +	sa = m;
  1.2179 +
  1.2180 +	switch (combine & COMBINE_A)
  1.2181 +	{
  1.2182 +	default:
  1.2183 +	    Fa = 0;
  1.2184 +	    break;
  1.2185 +
  1.2186 +	case COMBINE_A_OUT:
  1.2187 +	    m = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> 0), da);
  1.2188 +	    n = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2189 +	    o = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2190 +	    p = (uint64_t)combine_conjoint_out_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2191 +	    Fa = m | n | o | p;
  1.2192 +	    break;
  1.2193 +
  1.2194 +	case COMBINE_A_IN:
  1.2195 +	    m = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> 0), da);
  1.2196 +	    n = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> G_SHIFT), da) << G_SHIFT;
  1.2197 +	    o = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> R_SHIFT), da) << R_SHIFT;
  1.2198 +	    p = (uint64_t)combine_conjoint_in_part ((uint16_t) (sa >> A_SHIFT), da) << A_SHIFT;
  1.2199 +	    Fa = m | n | o | p;
  1.2200 +	    break;
  1.2201 +
  1.2202 +	case COMBINE_A:
  1.2203 +	    Fa = ~0;
  1.2204 +	    break;
  1.2205 +	}
  1.2206 +
  1.2207 +	switch (combine & COMBINE_B)
  1.2208 +	{
  1.2209 +	default:
  1.2210 +	    Fb = 0;
  1.2211 +	    break;
  1.2212 +
  1.2213 +	case COMBINE_B_OUT:
  1.2214 +	    m = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> 0));
  1.2215 +	    n = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2216 +	    o = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2217 +	    p = (uint64_t)combine_conjoint_out_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2218 +	    Fb = m | n | o | p;
  1.2219 +	    break;
  1.2220 +
  1.2221 +	case COMBINE_B_IN:
  1.2222 +	    m = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> 0));
  1.2223 +	    n = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> G_SHIFT)) << G_SHIFT;
  1.2224 +	    o = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> R_SHIFT)) << R_SHIFT;
  1.2225 +	    p = (uint64_t)combine_conjoint_in_part (da, (uint16_t) (sa >> A_SHIFT)) << A_SHIFT;
  1.2226 +	    Fb = m | n | o | p;
  1.2227 +	    break;
  1.2228 +
  1.2229 +	case COMBINE_B:
  1.2230 +	    Fb = ~0;
  1.2231 +	    break;
  1.2232 +	}
  1.2233 +	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
  1.2234 +	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
  1.2235 +	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
  1.2236 +	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
  1.2237 +
  1.2238 +	s = m | n | o | p;
  1.2239 +
  1.2240 +	*(dest + i) = s;
  1.2241 +    }
  1.2242 +}
  1.2243 +
  1.2244 +static void
  1.2245 +combine_conjoint_over_ca (pixman_implementation_t *imp,
  1.2246 +                          pixman_op_t              op,
  1.2247 +                          uint64_t *                dest,
  1.2248 +                          const uint64_t *          src,
  1.2249 +                          const uint64_t *          mask,
  1.2250 +                          int                      width)
  1.2251 +{
  1.2252 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
  1.2253 +}
  1.2254 +
  1.2255 +static void
  1.2256 +combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
  1.2257 +                                  pixman_op_t              op,
  1.2258 +                                  uint64_t *                dest,
  1.2259 +                                  const uint64_t *          src,
  1.2260 +                                  const uint64_t *          mask,
  1.2261 +                                  int                      width)
  1.2262 +{
  1.2263 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
  1.2264 +}
  1.2265 +
  1.2266 +static void
  1.2267 +combine_conjoint_in_ca (pixman_implementation_t *imp,
  1.2268 +                        pixman_op_t              op,
  1.2269 +                        uint64_t *                dest,
  1.2270 +                        const uint64_t *          src,
  1.2271 +                        const uint64_t *          mask,
  1.2272 +                        int                      width)
  1.2273 +{
  1.2274 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
  1.2275 +}
  1.2276 +
  1.2277 +static void
  1.2278 +combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
  1.2279 +                                pixman_op_t              op,
  1.2280 +                                uint64_t *                dest,
  1.2281 +                                const uint64_t *          src,
  1.2282 +                                const uint64_t *          mask,
  1.2283 +                                int                      width)
  1.2284 +{
  1.2285 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
  1.2286 +}
  1.2287 +
  1.2288 +static void
  1.2289 +combine_conjoint_out_ca (pixman_implementation_t *imp,
  1.2290 +                         pixman_op_t              op,
  1.2291 +                         uint64_t *                dest,
  1.2292 +                         const uint64_t *          src,
  1.2293 +                         const uint64_t *          mask,
  1.2294 +                         int                      width)
  1.2295 +{
  1.2296 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
  1.2297 +}
  1.2298 +
  1.2299 +static void
  1.2300 +combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
  1.2301 +                                 pixman_op_t              op,
  1.2302 +                                 uint64_t *                dest,
  1.2303 +                                 const uint64_t *          src,
  1.2304 +                                 const uint64_t *          mask,
  1.2305 +                                 int                      width)
  1.2306 +{
  1.2307 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
  1.2308 +}
  1.2309 +
  1.2310 +static void
  1.2311 +combine_conjoint_atop_ca (pixman_implementation_t *imp,
  1.2312 +                          pixman_op_t              op,
  1.2313 +                          uint64_t *                dest,
  1.2314 +                          const uint64_t *          src,
  1.2315 +                          const uint64_t *          mask,
  1.2316 +                          int                      width)
  1.2317 +{
  1.2318 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
  1.2319 +}
  1.2320 +
  1.2321 +static void
  1.2322 +combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
  1.2323 +                                  pixman_op_t              op,
  1.2324 +                                  uint64_t *                dest,
  1.2325 +                                  const uint64_t *          src,
  1.2326 +                                  const uint64_t *          mask,
  1.2327 +                                  int                      width)
  1.2328 +{
  1.2329 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
  1.2330 +}
  1.2331 +
  1.2332 +static void
  1.2333 +combine_conjoint_xor_ca (pixman_implementation_t *imp,
  1.2334 +                         pixman_op_t              op,
  1.2335 +                         uint64_t *                dest,
  1.2336 +                         const uint64_t *          src,
  1.2337 +                         const uint64_t *          mask,
  1.2338 +                         int                      width)
  1.2339 +{
  1.2340 +    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
  1.2341 +}
  1.2342 +
  1.2343 +void
  1.2344 +_pixman_setup_combiner_functions_64 (pixman_implementation_t *imp)
  1.2345 +{
  1.2346 +    /* Unified alpha */
  1.2347 +    imp->combine_64[PIXMAN_OP_CLEAR] = combine_clear;
  1.2348 +    imp->combine_64[PIXMAN_OP_SRC] = combine_src_u;
  1.2349 +    imp->combine_64[PIXMAN_OP_DST] = combine_dst;
  1.2350 +    imp->combine_64[PIXMAN_OP_OVER] = combine_over_u;
  1.2351 +    imp->combine_64[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
  1.2352 +    imp->combine_64[PIXMAN_OP_IN] = combine_in_u;
  1.2353 +    imp->combine_64[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
  1.2354 +    imp->combine_64[PIXMAN_OP_OUT] = combine_out_u;
  1.2355 +    imp->combine_64[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
  1.2356 +    imp->combine_64[PIXMAN_OP_ATOP] = combine_atop_u;
  1.2357 +    imp->combine_64[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
  1.2358 +    imp->combine_64[PIXMAN_OP_XOR] = combine_xor_u;
  1.2359 +    imp->combine_64[PIXMAN_OP_ADD] = combine_add_u;
  1.2360 +    imp->combine_64[PIXMAN_OP_SATURATE] = combine_saturate_u;
  1.2361 +
  1.2362 +    /* Disjoint, unified */
  1.2363 +    imp->combine_64[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
  1.2364 +    imp->combine_64[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
  1.2365 +    imp->combine_64[PIXMAN_OP_DISJOINT_DST] = combine_dst;
  1.2366 +    imp->combine_64[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
  1.2367 +    imp->combine_64[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
  1.2368 +    imp->combine_64[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
  1.2369 +    imp->combine_64[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
  1.2370 +    imp->combine_64[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
  1.2371 +    imp->combine_64[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
  1.2372 +    imp->combine_64[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
  1.2373 +    imp->combine_64[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
  1.2374 +    imp->combine_64[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
  1.2375 +
  1.2376 +    /* Conjoint, unified */
  1.2377 +    imp->combine_64[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
  1.2378 +    imp->combine_64[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
  1.2379 +    imp->combine_64[PIXMAN_OP_CONJOINT_DST] = combine_dst;
  1.2380 +    imp->combine_64[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
  1.2381 +    imp->combine_64[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
  1.2382 +    imp->combine_64[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
  1.2383 +    imp->combine_64[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
  1.2384 +    imp->combine_64[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
  1.2385 +    imp->combine_64[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
  1.2386 +    imp->combine_64[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
  1.2387 +    imp->combine_64[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
  1.2388 +    imp->combine_64[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
  1.2389 +
  1.2390 +    imp->combine_64[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
  1.2391 +    imp->combine_64[PIXMAN_OP_SCREEN] = combine_screen_u;
  1.2392 +    imp->combine_64[PIXMAN_OP_OVERLAY] = combine_overlay_u;
  1.2393 +    imp->combine_64[PIXMAN_OP_DARKEN] = combine_darken_u;
  1.2394 +    imp->combine_64[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
  1.2395 +    imp->combine_64[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
  1.2396 +    imp->combine_64[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
  1.2397 +    imp->combine_64[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
  1.2398 +    imp->combine_64[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
  1.2399 +    imp->combine_64[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
  1.2400 +    imp->combine_64[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
  1.2401 +    imp->combine_64[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
  1.2402 +    imp->combine_64[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
  1.2403 +    imp->combine_64[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
  1.2404 +    imp->combine_64[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
  1.2405 +
  1.2406 +    /* Component alpha combiners */
  1.2407 +    imp->combine_64_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
  1.2408 +    imp->combine_64_ca[PIXMAN_OP_SRC] = combine_src_ca;
  1.2409 +    /* dest */
  1.2410 +    imp->combine_64_ca[PIXMAN_OP_OVER] = combine_over_ca;
  1.2411 +    imp->combine_64_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
  1.2412 +    imp->combine_64_ca[PIXMAN_OP_IN] = combine_in_ca;
  1.2413 +    imp->combine_64_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
  1.2414 +    imp->combine_64_ca[PIXMAN_OP_OUT] = combine_out_ca;
  1.2415 +    imp->combine_64_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
  1.2416 +    imp->combine_64_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
  1.2417 +    imp->combine_64_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
  1.2418 +    imp->combine_64_ca[PIXMAN_OP_XOR] = combine_xor_ca;
  1.2419 +    imp->combine_64_ca[PIXMAN_OP_ADD] = combine_add_ca;
  1.2420 +    imp->combine_64_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
  1.2421 +
  1.2422 +    /* Disjoint CA */
  1.2423 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
  1.2424 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
  1.2425 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
  1.2426 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
  1.2427 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
  1.2428 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
  1.2429 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
  1.2430 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
  1.2431 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
  1.2432 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
  1.2433 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
  1.2434 +    imp->combine_64_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
  1.2435 +
  1.2436 +    /* Conjoint CA */
  1.2437 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
  1.2438 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
  1.2439 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
  1.2440 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
  1.2441 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
  1.2442 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
  1.2443 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
  1.2444 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
  1.2445 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
  1.2446 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
  1.2447 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
  1.2448 +    imp->combine_64_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
  1.2449 +
  1.2450 +    imp->combine_64_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
  1.2451 +    imp->combine_64_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
  1.2452 +    imp->combine_64_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
  1.2453 +    imp->combine_64_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
  1.2454 +    imp->combine_64_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
  1.2455 +    imp->combine_64_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
  1.2456 +    imp->combine_64_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
  1.2457 +    imp->combine_64_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
  1.2458 +    imp->combine_64_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
  1.2459 +    imp->combine_64_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
  1.2460 +    imp->combine_64_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
  1.2461 +
  1.2462 +    /* It is not clear that these make sense, so make them noops for now */
  1.2463 +    imp->combine_64_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
  1.2464 +    imp->combine_64_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
  1.2465 +    imp->combine_64_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
  1.2466 +    imp->combine_64_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
  1.2467 +}
  1.2468 +

mercurial