gfx/cairo/libpixman/src/pixman-fast-path.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2590 @@
     1.4 +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
     1.5 +/*
     1.6 + * Copyright © 2000 SuSE, Inc.
     1.7 + * Copyright © 2007 Red Hat, Inc.
     1.8 + *
     1.9 + * Permission to use, copy, modify, distribute, and sell this software and its
    1.10 + * documentation for any purpose is hereby granted without fee, provided that
    1.11 + * the above copyright notice appear in all copies and that both that
    1.12 + * copyright notice and this permission notice appear in supporting
    1.13 + * documentation, and that the name of SuSE not be used in advertising or
    1.14 + * publicity pertaining to distribution of the software without specific,
    1.15 + * written prior permission.  SuSE makes no representations about the
    1.16 + * suitability of this software for any purpose.  It is provided "as is"
    1.17 + * without express or implied warranty.
    1.18 + *
    1.19 + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
    1.20 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
    1.21 + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    1.22 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
    1.23 + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
    1.24 + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    1.25 + *
    1.26 + * Author:  Keith Packard, SuSE, Inc.
    1.27 + */
    1.28 +
    1.29 +#ifdef HAVE_CONFIG_H
    1.30 +#include <config.h>
    1.31 +#endif
    1.32 +#include <string.h>
    1.33 +#include <stdlib.h>
    1.34 +#include "pixman-private.h"
    1.35 +#include "pixman-combine32.h"
    1.36 +#include "pixman-inlines.h"
    1.37 +
    1.38 +static force_inline uint32_t
    1.39 +fetch_24 (uint8_t *a)
    1.40 +{
    1.41 +    if (((uintptr_t)a) & 1)
    1.42 +    {
    1.43 +#ifdef WORDS_BIGENDIAN
    1.44 +	return (*a << 16) | (*(uint16_t *)(a + 1));
    1.45 +#else
    1.46 +	return *a | (*(uint16_t *)(a + 1) << 8);
    1.47 +#endif
    1.48 +    }
    1.49 +    else
    1.50 +    {
    1.51 +#ifdef WORDS_BIGENDIAN
    1.52 +	return (*(uint16_t *)a << 8) | *(a + 2);
    1.53 +#else
    1.54 +	return *(uint16_t *)a | (*(a + 2) << 16);
    1.55 +#endif
    1.56 +    }
    1.57 +}
    1.58 +
    1.59 +static force_inline void
    1.60 +store_24 (uint8_t *a,
    1.61 +          uint32_t v)
    1.62 +{
    1.63 +    if (((uintptr_t)a) & 1)
    1.64 +    {
    1.65 +#ifdef WORDS_BIGENDIAN
    1.66 +	*a = (uint8_t) (v >> 16);
    1.67 +	*(uint16_t *)(a + 1) = (uint16_t) (v);
    1.68 +#else
    1.69 +	*a = (uint8_t) (v);
    1.70 +	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
    1.71 +#endif
    1.72 +    }
    1.73 +    else
    1.74 +    {
    1.75 +#ifdef WORDS_BIGENDIAN
    1.76 +	*(uint16_t *)a = (uint16_t)(v >> 8);
    1.77 +	*(a + 2) = (uint8_t)v;
    1.78 +#else
    1.79 +	*(uint16_t *)a = (uint16_t)v;
    1.80 +	*(a + 2) = (uint8_t)(v >> 16);
    1.81 +#endif
    1.82 +    }
    1.83 +}
    1.84 +
    1.85 +static force_inline uint32_t
    1.86 +over (uint32_t src,
    1.87 +      uint32_t dest)
    1.88 +{
    1.89 +    uint32_t a = ~src >> 24;
    1.90 +
    1.91 +    UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
    1.92 +
    1.93 +    return dest;
    1.94 +}
    1.95 +
    1.96 +static force_inline uint32_t
    1.97 +in (uint32_t x,
    1.98 +    uint8_t  y)
    1.99 +{
   1.100 +    uint16_t a = y;
   1.101 +
   1.102 +    UN8x4_MUL_UN8 (x, a);
   1.103 +
   1.104 +    return x;
   1.105 +}
   1.106 +
   1.107 +/*
   1.108 + * Naming convention:
   1.109 + *
   1.110 + *  op_src_mask_dest
   1.111 + */
   1.112 +static void
   1.113 +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
   1.114 +                                 pixman_composite_info_t *info)
   1.115 +{
   1.116 +    PIXMAN_COMPOSITE_ARGS (info);
   1.117 +    uint32_t    *src, *src_line;
   1.118 +    uint32_t    *dst, *dst_line;
   1.119 +    uint8_t     *mask, *mask_line;
   1.120 +    int src_stride, mask_stride, dst_stride;
   1.121 +    uint8_t m;
   1.122 +    uint32_t s, d;
   1.123 +    int32_t w;
   1.124 +
   1.125 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.126 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   1.127 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   1.128 +
   1.129 +    while (height--)
   1.130 +    {
   1.131 +	src = src_line;
   1.132 +	src_line += src_stride;
   1.133 +	dst = dst_line;
   1.134 +	dst_line += dst_stride;
   1.135 +	mask = mask_line;
   1.136 +	mask_line += mask_stride;
   1.137 +
   1.138 +	w = width;
   1.139 +	while (w--)
   1.140 +	{
   1.141 +	    m = *mask++;
   1.142 +	    if (m)
   1.143 +	    {
   1.144 +		s = *src | 0xff000000;
   1.145 +
   1.146 +		if (m == 0xff)
   1.147 +		{
   1.148 +		    *dst = s;
   1.149 +		}
   1.150 +		else
   1.151 +		{
   1.152 +		    d = in (s, m);
   1.153 +		    *dst = over (d, *dst);
   1.154 +		}
   1.155 +	    }
   1.156 +	    src++;
   1.157 +	    dst++;
   1.158 +	}
   1.159 +    }
   1.160 +}
   1.161 +
   1.162 +static void
   1.163 +fast_composite_in_n_8_8 (pixman_implementation_t *imp,
   1.164 +                         pixman_composite_info_t *info)
   1.165 +{
   1.166 +    PIXMAN_COMPOSITE_ARGS (info);
   1.167 +    uint32_t src, srca;
   1.168 +    uint8_t     *dst_line, *dst;
   1.169 +    uint8_t     *mask_line, *mask, m;
   1.170 +    int dst_stride, mask_stride;
   1.171 +    int32_t w;
   1.172 +    uint16_t t;
   1.173 +
   1.174 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.175 +
   1.176 +    srca = src >> 24;
   1.177 +
   1.178 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   1.179 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   1.180 +
   1.181 +    if (srca == 0xff)
   1.182 +    {
   1.183 +	while (height--)
   1.184 +	{
   1.185 +	    dst = dst_line;
   1.186 +	    dst_line += dst_stride;
   1.187 +	    mask = mask_line;
   1.188 +	    mask_line += mask_stride;
   1.189 +	    w = width;
   1.190 +
   1.191 +	    while (w--)
   1.192 +	    {
   1.193 +		m = *mask++;
   1.194 +
   1.195 +		if (m == 0)
   1.196 +		    *dst = 0;
   1.197 +		else if (m != 0xff)
   1.198 +		    *dst = MUL_UN8 (m, *dst, t);
   1.199 +
   1.200 +		dst++;
   1.201 +	    }
   1.202 +	}
   1.203 +    }
   1.204 +    else
   1.205 +    {
   1.206 +	while (height--)
   1.207 +	{
   1.208 +	    dst = dst_line;
   1.209 +	    dst_line += dst_stride;
   1.210 +	    mask = mask_line;
   1.211 +	    mask_line += mask_stride;
   1.212 +	    w = width;
   1.213 +
   1.214 +	    while (w--)
   1.215 +	    {
   1.216 +		m = *mask++;
   1.217 +		m = MUL_UN8 (m, srca, t);
   1.218 +
   1.219 +		if (m == 0)
   1.220 +		    *dst = 0;
   1.221 +		else if (m != 0xff)
   1.222 +		    *dst = MUL_UN8 (m, *dst, t);
   1.223 +
   1.224 +		dst++;
   1.225 +	    }
   1.226 +	}
   1.227 +    }
   1.228 +}
   1.229 +
   1.230 +static void
   1.231 +fast_composite_in_8_8 (pixman_implementation_t *imp,
   1.232 +                       pixman_composite_info_t *info)
   1.233 +{
   1.234 +    PIXMAN_COMPOSITE_ARGS (info);
   1.235 +    uint8_t     *dst_line, *dst;
   1.236 +    uint8_t     *src_line, *src;
   1.237 +    int dst_stride, src_stride;
   1.238 +    int32_t w;
   1.239 +    uint8_t s;
   1.240 +    uint16_t t;
   1.241 +
   1.242 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
   1.243 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   1.244 +
   1.245 +    while (height--)
   1.246 +    {
   1.247 +	dst = dst_line;
   1.248 +	dst_line += dst_stride;
   1.249 +	src = src_line;
   1.250 +	src_line += src_stride;
   1.251 +	w = width;
   1.252 +
   1.253 +	while (w--)
   1.254 +	{
   1.255 +	    s = *src++;
   1.256 +
   1.257 +	    if (s == 0)
   1.258 +		*dst = 0;
   1.259 +	    else if (s != 0xff)
   1.260 +		*dst = MUL_UN8 (s, *dst, t);
   1.261 +
   1.262 +	    dst++;
   1.263 +	}
   1.264 +    }
   1.265 +}
   1.266 +
   1.267 +static void
   1.268 +fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
   1.269 +                              pixman_composite_info_t *info)
   1.270 +{
   1.271 +    PIXMAN_COMPOSITE_ARGS (info);
   1.272 +    uint32_t src, srca;
   1.273 +    uint32_t    *dst_line, *dst, d;
   1.274 +    uint8_t     *mask_line, *mask, m;
   1.275 +    int dst_stride, mask_stride;
   1.276 +    int32_t w;
   1.277 +
   1.278 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.279 +
   1.280 +    srca = src >> 24;
   1.281 +    if (src == 0)
   1.282 +	return;
   1.283 +
   1.284 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.285 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   1.286 +
   1.287 +    while (height--)
   1.288 +    {
   1.289 +	dst = dst_line;
   1.290 +	dst_line += dst_stride;
   1.291 +	mask = mask_line;
   1.292 +	mask_line += mask_stride;
   1.293 +	w = width;
   1.294 +
   1.295 +	while (w--)
   1.296 +	{
   1.297 +	    m = *mask++;
   1.298 +	    if (m == 0xff)
   1.299 +	    {
   1.300 +		if (srca == 0xff)
   1.301 +		    *dst = src;
   1.302 +		else
   1.303 +		    *dst = over (src, *dst);
   1.304 +	    }
   1.305 +	    else if (m)
   1.306 +	    {
   1.307 +		d = in (src, m);
   1.308 +		*dst = over (d, *dst);
   1.309 +	    }
   1.310 +	    dst++;
   1.311 +	}
   1.312 +    }
   1.313 +}
   1.314 +
   1.315 +static void
   1.316 +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
   1.317 +				   pixman_composite_info_t *info)
   1.318 +{
   1.319 +    PIXMAN_COMPOSITE_ARGS (info);
   1.320 +    uint32_t src, s;
   1.321 +    uint32_t    *dst_line, *dst, d;
   1.322 +    uint32_t    *mask_line, *mask, ma;
   1.323 +    int dst_stride, mask_stride;
   1.324 +    int32_t w;
   1.325 +
   1.326 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.327 +
   1.328 +    if (src == 0)
   1.329 +	return;
   1.330 +
   1.331 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.332 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
   1.333 +
   1.334 +    while (height--)
   1.335 +    {
   1.336 +	dst = dst_line;
   1.337 +	dst_line += dst_stride;
   1.338 +	mask = mask_line;
   1.339 +	mask_line += mask_stride;
   1.340 +	w = width;
   1.341 +
   1.342 +	while (w--)
   1.343 +	{
   1.344 +	    ma = *mask++;
   1.345 +
   1.346 +	    if (ma)
   1.347 +	    {
   1.348 +		d = *dst;
   1.349 +		s = src;
   1.350 +
   1.351 +		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
   1.352 +
   1.353 +		*dst = s;
   1.354 +	    }
   1.355 +
   1.356 +	    dst++;
   1.357 +	}
   1.358 +    }
   1.359 +}
   1.360 +
   1.361 +static void
   1.362 +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
   1.363 +                                    pixman_composite_info_t *info)
   1.364 +{
   1.365 +    PIXMAN_COMPOSITE_ARGS (info);
   1.366 +    uint32_t src, srca, s;
   1.367 +    uint32_t    *dst_line, *dst, d;
   1.368 +    uint32_t    *mask_line, *mask, ma;
   1.369 +    int dst_stride, mask_stride;
   1.370 +    int32_t w;
   1.371 +
   1.372 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.373 +
   1.374 +    srca = src >> 24;
   1.375 +    if (src == 0)
   1.376 +	return;
   1.377 +
   1.378 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.379 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
   1.380 +
   1.381 +    while (height--)
   1.382 +    {
   1.383 +	dst = dst_line;
   1.384 +	dst_line += dst_stride;
   1.385 +	mask = mask_line;
   1.386 +	mask_line += mask_stride;
   1.387 +	w = width;
   1.388 +
   1.389 +	while (w--)
   1.390 +	{
   1.391 +	    ma = *mask++;
   1.392 +	    if (ma == 0xffffffff)
   1.393 +	    {
   1.394 +		if (srca == 0xff)
   1.395 +		    *dst = src;
   1.396 +		else
   1.397 +		    *dst = over (src, *dst);
   1.398 +	    }
   1.399 +	    else if (ma)
   1.400 +	    {
   1.401 +		d = *dst;
   1.402 +		s = src;
   1.403 +
   1.404 +		UN8x4_MUL_UN8x4 (s, ma);
   1.405 +		UN8x4_MUL_UN8 (ma, srca);
   1.406 +		ma = ~ma;
   1.407 +		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
   1.408 +
   1.409 +		*dst = d;
   1.410 +	    }
   1.411 +
   1.412 +	    dst++;
   1.413 +	}
   1.414 +    }
   1.415 +}
   1.416 +
   1.417 +static void
   1.418 +fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
   1.419 +                              pixman_composite_info_t *info)
   1.420 +{
   1.421 +    PIXMAN_COMPOSITE_ARGS (info);
   1.422 +    uint32_t src, srca;
   1.423 +    uint8_t     *dst_line, *dst;
   1.424 +    uint32_t d;
   1.425 +    uint8_t     *mask_line, *mask, m;
   1.426 +    int dst_stride, mask_stride;
   1.427 +    int32_t w;
   1.428 +
   1.429 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.430 +
   1.431 +    srca = src >> 24;
   1.432 +    if (src == 0)
   1.433 +	return;
   1.434 +
   1.435 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
   1.436 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   1.437 +
   1.438 +    while (height--)
   1.439 +    {
   1.440 +	dst = dst_line;
   1.441 +	dst_line += dst_stride;
   1.442 +	mask = mask_line;
   1.443 +	mask_line += mask_stride;
   1.444 +	w = width;
   1.445 +
   1.446 +	while (w--)
   1.447 +	{
   1.448 +	    m = *mask++;
   1.449 +	    if (m == 0xff)
   1.450 +	    {
   1.451 +		if (srca == 0xff)
   1.452 +		{
   1.453 +		    d = src;
   1.454 +		}
   1.455 +		else
   1.456 +		{
   1.457 +		    d = fetch_24 (dst);
   1.458 +		    d = over (src, d);
   1.459 +		}
   1.460 +		store_24 (dst, d);
   1.461 +	    }
   1.462 +	    else if (m)
   1.463 +	    {
   1.464 +		d = over (in (src, m), fetch_24 (dst));
   1.465 +		store_24 (dst, d);
   1.466 +	    }
   1.467 +	    dst += 3;
   1.468 +	}
   1.469 +    }
   1.470 +}
   1.471 +
   1.472 +static void
   1.473 +fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
   1.474 +                              pixman_composite_info_t *info)
   1.475 +{
   1.476 +    PIXMAN_COMPOSITE_ARGS (info);
   1.477 +    uint32_t src, srca;
   1.478 +    uint16_t    *dst_line, *dst;
   1.479 +    uint32_t d;
   1.480 +    uint8_t     *mask_line, *mask, m;
   1.481 +    int dst_stride, mask_stride;
   1.482 +    int32_t w;
   1.483 +
   1.484 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.485 +
   1.486 +    srca = src >> 24;
   1.487 +    if (src == 0)
   1.488 +	return;
   1.489 +
   1.490 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   1.491 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   1.492 +
   1.493 +    while (height--)
   1.494 +    {
   1.495 +	dst = dst_line;
   1.496 +	dst_line += dst_stride;
   1.497 +	mask = mask_line;
   1.498 +	mask_line += mask_stride;
   1.499 +	w = width;
   1.500 +
   1.501 +	while (w--)
   1.502 +	{
   1.503 +	    m = *mask++;
   1.504 +	    if (m == 0xff)
   1.505 +	    {
   1.506 +		if (srca == 0xff)
   1.507 +		{
   1.508 +		    d = src;
   1.509 +		}
   1.510 +		else
   1.511 +		{
   1.512 +		    d = *dst;
   1.513 +		    d = over (src, convert_0565_to_0888 (d));
   1.514 +		}
   1.515 +		*dst = convert_8888_to_0565 (d);
   1.516 +	    }
   1.517 +	    else if (m)
   1.518 +	    {
   1.519 +		d = *dst;
   1.520 +		d = over (in (src, m), convert_0565_to_0888 (d));
   1.521 +		*dst = convert_8888_to_0565 (d);
   1.522 +	    }
   1.523 +	    dst++;
   1.524 +	}
   1.525 +    }
   1.526 +}
   1.527 +
   1.528 +static void
   1.529 +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
   1.530 +                                    pixman_composite_info_t *info)
   1.531 +{
   1.532 +    PIXMAN_COMPOSITE_ARGS (info);
   1.533 +    uint32_t  src, srca, s;
   1.534 +    uint16_t  src16;
   1.535 +    uint16_t *dst_line, *dst;
   1.536 +    uint32_t  d;
   1.537 +    uint32_t *mask_line, *mask, ma;
   1.538 +    int dst_stride, mask_stride;
   1.539 +    int32_t w;
   1.540 +
   1.541 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.542 +
   1.543 +    srca = src >> 24;
   1.544 +    if (src == 0)
   1.545 +	return;
   1.546 +
   1.547 +    src16 = convert_8888_to_0565 (src);
   1.548 +
   1.549 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   1.550 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
   1.551 +
   1.552 +    while (height--)
   1.553 +    {
   1.554 +	dst = dst_line;
   1.555 +	dst_line += dst_stride;
   1.556 +	mask = mask_line;
   1.557 +	mask_line += mask_stride;
   1.558 +	w = width;
   1.559 +
   1.560 +	while (w--)
   1.561 +	{
   1.562 +	    ma = *mask++;
   1.563 +	    if (ma == 0xffffffff)
   1.564 +	    {
   1.565 +		if (srca == 0xff)
   1.566 +		{
   1.567 +		    *dst = src16;
   1.568 +		}
   1.569 +		else
   1.570 +		{
   1.571 +		    d = *dst;
   1.572 +		    d = over (src, convert_0565_to_0888 (d));
   1.573 +		    *dst = convert_8888_to_0565 (d);
   1.574 +		}
   1.575 +	    }
   1.576 +	    else if (ma)
   1.577 +	    {
   1.578 +		d = *dst;
   1.579 +		d = convert_0565_to_0888 (d);
   1.580 +
   1.581 +		s = src;
   1.582 +
   1.583 +		UN8x4_MUL_UN8x4 (s, ma);
   1.584 +		UN8x4_MUL_UN8 (ma, srca);
   1.585 +		ma = ~ma;
   1.586 +		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
   1.587 +
   1.588 +		*dst = convert_8888_to_0565 (d);
   1.589 +	    }
   1.590 +	    dst++;
   1.591 +	}
   1.592 +    }
   1.593 +}
   1.594 +
   1.595 +static void
   1.596 +fast_composite_over_8888_8888 (pixman_implementation_t *imp,
   1.597 +                               pixman_composite_info_t *info)
   1.598 +{
   1.599 +    PIXMAN_COMPOSITE_ARGS (info);
   1.600 +    uint32_t    *dst_line, *dst;
   1.601 +    uint32_t    *src_line, *src, s;
   1.602 +    int dst_stride, src_stride;
   1.603 +    uint8_t a;
   1.604 +    int32_t w;
   1.605 +
   1.606 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.607 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   1.608 +
   1.609 +    while (height--)
   1.610 +    {
   1.611 +	dst = dst_line;
   1.612 +	dst_line += dst_stride;
   1.613 +	src = src_line;
   1.614 +	src_line += src_stride;
   1.615 +	w = width;
   1.616 +
   1.617 +	while (w--)
   1.618 +	{
   1.619 +	    s = *src++;
   1.620 +	    a = s >> 24;
   1.621 +	    if (a == 0xff)
   1.622 +		*dst = s;
   1.623 +	    else if (s)
   1.624 +		*dst = over (s, *dst);
   1.625 +	    dst++;
   1.626 +	}
   1.627 +    }
   1.628 +}
   1.629 +
   1.630 +static void
   1.631 +fast_composite_src_x888_8888 (pixman_implementation_t *imp,
   1.632 +			      pixman_composite_info_t *info)
   1.633 +{
   1.634 +    PIXMAN_COMPOSITE_ARGS (info);
   1.635 +    uint32_t    *dst_line, *dst;
   1.636 +    uint32_t    *src_line, *src;
   1.637 +    int dst_stride, src_stride;
   1.638 +    int32_t w;
   1.639 +
   1.640 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.641 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   1.642 +
   1.643 +    while (height--)
   1.644 +    {
   1.645 +	dst = dst_line;
   1.646 +	dst_line += dst_stride;
   1.647 +	src = src_line;
   1.648 +	src_line += src_stride;
   1.649 +	w = width;
   1.650 +
   1.651 +	while (w--)
   1.652 +	    *dst++ = (*src++) | 0xff000000;
   1.653 +    }
   1.654 +}
   1.655 +
   1.656 +#if 0
   1.657 +static void
   1.658 +fast_composite_over_8888_0888 (pixman_implementation_t *imp,
   1.659 +			       pixman_composite_info_t *info)
   1.660 +{
   1.661 +    PIXMAN_COMPOSITE_ARGS (info);
   1.662 +    uint8_t     *dst_line, *dst;
   1.663 +    uint32_t d;
   1.664 +    uint32_t    *src_line, *src, s;
   1.665 +    uint8_t a;
   1.666 +    int dst_stride, src_stride;
   1.667 +    int32_t w;
   1.668 +
   1.669 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
   1.670 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   1.671 +
   1.672 +    while (height--)
   1.673 +    {
   1.674 +	dst = dst_line;
   1.675 +	dst_line += dst_stride;
   1.676 +	src = src_line;
   1.677 +	src_line += src_stride;
   1.678 +	w = width;
   1.679 +
   1.680 +	while (w--)
   1.681 +	{
   1.682 +	    s = *src++;
   1.683 +	    a = s >> 24;
   1.684 +	    if (a)
   1.685 +	    {
   1.686 +		if (a == 0xff)
   1.687 +		    d = s;
   1.688 +		else
   1.689 +		    d = over (s, fetch_24 (dst));
   1.690 +
   1.691 +		store_24 (dst, d);
   1.692 +	    }
   1.693 +	    dst += 3;
   1.694 +	}
   1.695 +    }
   1.696 +}
   1.697 +#endif
   1.698 +
   1.699 +static void
   1.700 +fast_composite_over_8888_0565 (pixman_implementation_t *imp,
   1.701 +                               pixman_composite_info_t *info)
   1.702 +{
   1.703 +    PIXMAN_COMPOSITE_ARGS (info);
   1.704 +    uint16_t    *dst_line, *dst;
   1.705 +    uint32_t d;
   1.706 +    uint32_t    *src_line, *src, s;
   1.707 +    uint8_t a;
   1.708 +    int dst_stride, src_stride;
   1.709 +    int32_t w;
   1.710 +
   1.711 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   1.712 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   1.713 +
   1.714 +    while (height--)
   1.715 +    {
   1.716 +	dst = dst_line;
   1.717 +	dst_line += dst_stride;
   1.718 +	src = src_line;
   1.719 +	src_line += src_stride;
   1.720 +	w = width;
   1.721 +
   1.722 +	while (w--)
   1.723 +	{
   1.724 +	    s = *src++;
   1.725 +	    a = s >> 24;
   1.726 +	    if (s)
   1.727 +	    {
   1.728 +		if (a == 0xff)
   1.729 +		{
   1.730 +		    d = s;
   1.731 +		}
   1.732 +		else
   1.733 +		{
   1.734 +		    d = *dst;
   1.735 +		    d = over (s, convert_0565_to_0888 (d));
   1.736 +		}
   1.737 +		*dst = convert_8888_to_0565 (d);
   1.738 +	    }
   1.739 +	    dst++;
   1.740 +	}
   1.741 +    }
   1.742 +}
   1.743 +
   1.744 +static void
   1.745 +fast_composite_add_8_8 (pixman_implementation_t *imp,
   1.746 +			pixman_composite_info_t *info)
   1.747 +{
   1.748 +    PIXMAN_COMPOSITE_ARGS (info);
   1.749 +    uint8_t     *dst_line, *dst;
   1.750 +    uint8_t     *src_line, *src;
   1.751 +    int dst_stride, src_stride;
   1.752 +    int32_t w;
   1.753 +    uint8_t s, d;
   1.754 +    uint16_t t;
   1.755 +
   1.756 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
   1.757 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   1.758 +
   1.759 +    while (height--)
   1.760 +    {
   1.761 +	dst = dst_line;
   1.762 +	dst_line += dst_stride;
   1.763 +	src = src_line;
   1.764 +	src_line += src_stride;
   1.765 +	w = width;
   1.766 +
   1.767 +	while (w--)
   1.768 +	{
   1.769 +	    s = *src++;
   1.770 +	    if (s)
   1.771 +	    {
   1.772 +		if (s != 0xff)
   1.773 +		{
   1.774 +		    d = *dst;
   1.775 +		    t = d + s;
   1.776 +		    s = t | (0 - (t >> 8));
   1.777 +		}
   1.778 +		*dst = s;
   1.779 +	    }
   1.780 +	    dst++;
   1.781 +	}
   1.782 +    }
   1.783 +}
   1.784 +
   1.785 +static void
   1.786 +fast_composite_add_0565_0565 (pixman_implementation_t *imp,
   1.787 +                              pixman_composite_info_t *info)
   1.788 +{
   1.789 +    PIXMAN_COMPOSITE_ARGS (info);
   1.790 +    uint16_t    *dst_line, *dst;
   1.791 +    uint32_t	d;
   1.792 +    uint16_t    *src_line, *src;
   1.793 +    uint32_t	s;
   1.794 +    int dst_stride, src_stride;
   1.795 +    int32_t w;
   1.796 +
   1.797 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
   1.798 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   1.799 +
   1.800 +    while (height--)
   1.801 +    {
   1.802 +	dst = dst_line;
   1.803 +	dst_line += dst_stride;
   1.804 +	src = src_line;
   1.805 +	src_line += src_stride;
   1.806 +	w = width;
   1.807 +
   1.808 +	while (w--)
   1.809 +	{
   1.810 +	    s = *src++;
   1.811 +	    if (s)
   1.812 +	    {
   1.813 +		d = *dst;
   1.814 +		s = convert_0565_to_8888 (s);
   1.815 +		if (d)
   1.816 +		{
   1.817 +		    d = convert_0565_to_8888 (d);
   1.818 +		    UN8x4_ADD_UN8x4 (s, d);
   1.819 +		}
   1.820 +		*dst = convert_8888_to_0565 (s);
   1.821 +	    }
   1.822 +	    dst++;
   1.823 +	}
   1.824 +    }
   1.825 +}
   1.826 +
   1.827 +static void
   1.828 +fast_composite_add_8888_8888 (pixman_implementation_t *imp,
   1.829 +                              pixman_composite_info_t *info)
   1.830 +{
   1.831 +    PIXMAN_COMPOSITE_ARGS (info);
   1.832 +    uint32_t    *dst_line, *dst;
   1.833 +    uint32_t    *src_line, *src;
   1.834 +    int dst_stride, src_stride;
   1.835 +    int32_t w;
   1.836 +    uint32_t s, d;
   1.837 +
   1.838 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   1.839 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1.840 +
   1.841 +    while (height--)
   1.842 +    {
   1.843 +	dst = dst_line;
   1.844 +	dst_line += dst_stride;
   1.845 +	src = src_line;
   1.846 +	src_line += src_stride;
   1.847 +	w = width;
   1.848 +
   1.849 +	while (w--)
   1.850 +	{
   1.851 +	    s = *src++;
   1.852 +	    if (s)
   1.853 +	    {
   1.854 +		if (s != 0xffffffff)
   1.855 +		{
   1.856 +		    d = *dst;
   1.857 +		    if (d)
   1.858 +			UN8x4_ADD_UN8x4 (s, d);
   1.859 +		}
   1.860 +		*dst = s;
   1.861 +	    }
   1.862 +	    dst++;
   1.863 +	}
   1.864 +    }
   1.865 +}
   1.866 +
   1.867 +static void
   1.868 +fast_composite_add_n_8_8 (pixman_implementation_t *imp,
   1.869 +			  pixman_composite_info_t *info)
   1.870 +{
   1.871 +    PIXMAN_COMPOSITE_ARGS (info);
   1.872 +    uint8_t     *dst_line, *dst;
   1.873 +    uint8_t     *mask_line, *mask;
   1.874 +    int dst_stride, mask_stride;
   1.875 +    int32_t w;
   1.876 +    uint32_t src;
   1.877 +    uint8_t sa;
   1.878 +
   1.879 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   1.880 +    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   1.881 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.882 +    sa = (src >> 24);
   1.883 +
   1.884 +    while (height--)
   1.885 +    {
   1.886 +	dst = dst_line;
   1.887 +	dst_line += dst_stride;
   1.888 +	mask = mask_line;
   1.889 +	mask_line += mask_stride;
   1.890 +	w = width;
   1.891 +
   1.892 +	while (w--)
   1.893 +	{
   1.894 +	    uint16_t tmp;
   1.895 +	    uint16_t a;
   1.896 +	    uint32_t m, d;
   1.897 +	    uint32_t r;
   1.898 +
   1.899 +	    a = *mask++;
   1.900 +	    d = *dst;
   1.901 +
   1.902 +	    m = MUL_UN8 (sa, a, tmp);
   1.903 +	    r = ADD_UN8 (m, d, tmp);
   1.904 +
   1.905 +	    *dst++ = r;
   1.906 +	}
   1.907 +    }
   1.908 +}
   1.909 +
   1.910 +#ifdef WORDS_BIGENDIAN
   1.911 +#define CREATE_BITMASK(n) (0x80000000 >> (n))
   1.912 +#define UPDATE_BITMASK(n) ((n) >> 1)
   1.913 +#else
   1.914 +#define CREATE_BITMASK(n) (1 << (n))
   1.915 +#define UPDATE_BITMASK(n) ((n) << 1)
   1.916 +#endif
   1.917 +
   1.918 +#define TEST_BIT(p, n)					\
   1.919 +    (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
   1.920 +#define SET_BIT(p, n)							\
   1.921 +    do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
   1.922 +
   1.923 +static void
   1.924 +fast_composite_add_1_1 (pixman_implementation_t *imp,
   1.925 +			pixman_composite_info_t *info)
   1.926 +{
   1.927 +    PIXMAN_COMPOSITE_ARGS (info);
   1.928 +    uint32_t     *dst_line, *dst;
   1.929 +    uint32_t     *src_line, *src;
   1.930 +    int           dst_stride, src_stride;
   1.931 +    int32_t       w;
   1.932 +
   1.933 +    PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
   1.934 +                           src_stride, src_line, 1);
   1.935 +    PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
   1.936 +                           dst_stride, dst_line, 1);
   1.937 +
   1.938 +    while (height--)
   1.939 +    {
   1.940 +	dst = dst_line;
   1.941 +	dst_line += dst_stride;
   1.942 +	src = src_line;
   1.943 +	src_line += src_stride;
   1.944 +	w = width;
   1.945 +
   1.946 +	while (w--)
   1.947 +	{
   1.948 +	    /*
   1.949 +	     * TODO: improve performance by processing uint32_t data instead
   1.950 +	     *       of individual bits
   1.951 +	     */
   1.952 +	    if (TEST_BIT (src, src_x + w))
   1.953 +		SET_BIT (dst, dest_x + w);
   1.954 +	}
   1.955 +    }
   1.956 +}
   1.957 +
   1.958 +static void
   1.959 +fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
   1.960 +                              pixman_composite_info_t *info)
   1.961 +{
   1.962 +    PIXMAN_COMPOSITE_ARGS (info);
   1.963 +    uint32_t     src, srca;
   1.964 +    uint32_t    *dst, *dst_line;
   1.965 +    uint32_t    *mask, *mask_line;
   1.966 +    int          mask_stride, dst_stride;
   1.967 +    uint32_t     bitcache, bitmask;
   1.968 +    int32_t      w;
   1.969 +
   1.970 +    if (width <= 0)
   1.971 +	return;
   1.972 +
   1.973 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1.974 +    srca = src >> 24;
   1.975 +    if (src == 0)
   1.976 +	return;
   1.977 +
   1.978 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
   1.979 +                           dst_stride, dst_line, 1);
   1.980 +    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
   1.981 +                           mask_stride, mask_line, 1);
   1.982 +    mask_line += mask_x >> 5;
   1.983 +
   1.984 +    if (srca == 0xff)
   1.985 +    {
   1.986 +	while (height--)
   1.987 +	{
   1.988 +	    dst = dst_line;
   1.989 +	    dst_line += dst_stride;
   1.990 +	    mask = mask_line;
   1.991 +	    mask_line += mask_stride;
   1.992 +	    w = width;
   1.993 +
   1.994 +	    bitcache = *mask++;
   1.995 +	    bitmask = CREATE_BITMASK (mask_x & 31);
   1.996 +
   1.997 +	    while (w--)
   1.998 +	    {
   1.999 +		if (bitmask == 0)
  1.1000 +		{
  1.1001 +		    bitcache = *mask++;
  1.1002 +		    bitmask = CREATE_BITMASK (0);
  1.1003 +		}
  1.1004 +		if (bitcache & bitmask)
  1.1005 +		    *dst = src;
  1.1006 +		bitmask = UPDATE_BITMASK (bitmask);
  1.1007 +		dst++;
  1.1008 +	    }
  1.1009 +	}
  1.1010 +    }
  1.1011 +    else
  1.1012 +    {
  1.1013 +	while (height--)
  1.1014 +	{
  1.1015 +	    dst = dst_line;
  1.1016 +	    dst_line += dst_stride;
  1.1017 +	    mask = mask_line;
  1.1018 +	    mask_line += mask_stride;
  1.1019 +	    w = width;
  1.1020 +
  1.1021 +	    bitcache = *mask++;
  1.1022 +	    bitmask = CREATE_BITMASK (mask_x & 31);
  1.1023 +
  1.1024 +	    while (w--)
  1.1025 +	    {
  1.1026 +		if (bitmask == 0)
  1.1027 +		{
  1.1028 +		    bitcache = *mask++;
  1.1029 +		    bitmask = CREATE_BITMASK (0);
  1.1030 +		}
  1.1031 +		if (bitcache & bitmask)
  1.1032 +		    *dst = over (src, *dst);
  1.1033 +		bitmask = UPDATE_BITMASK (bitmask);
  1.1034 +		dst++;
  1.1035 +	    }
  1.1036 +	}
  1.1037 +    }
  1.1038 +}
  1.1039 +
  1.1040 +static void
  1.1041 +fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
  1.1042 +                              pixman_composite_info_t *info)
  1.1043 +{
  1.1044 +    PIXMAN_COMPOSITE_ARGS (info);
  1.1045 +    uint32_t     src, srca;
  1.1046 +    uint16_t    *dst, *dst_line;
  1.1047 +    uint32_t    *mask, *mask_line;
  1.1048 +    int          mask_stride, dst_stride;
  1.1049 +    uint32_t     bitcache, bitmask;
  1.1050 +    int32_t      w;
  1.1051 +    uint32_t     d;
  1.1052 +    uint16_t     src565;
  1.1053 +
  1.1054 +    if (width <= 0)
  1.1055 +	return;
  1.1056 +
  1.1057 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
  1.1058 +    srca = src >> 24;
  1.1059 +    if (src == 0)
  1.1060 +	return;
  1.1061 +
  1.1062 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
  1.1063 +                           dst_stride, dst_line, 1);
  1.1064 +    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
  1.1065 +                           mask_stride, mask_line, 1);
  1.1066 +    mask_line += mask_x >> 5;
  1.1067 +
  1.1068 +    if (srca == 0xff)
  1.1069 +    {
  1.1070 +	src565 = convert_8888_to_0565 (src);
  1.1071 +	while (height--)
  1.1072 +	{
  1.1073 +	    dst = dst_line;
  1.1074 +	    dst_line += dst_stride;
  1.1075 +	    mask = mask_line;
  1.1076 +	    mask_line += mask_stride;
  1.1077 +	    w = width;
  1.1078 +
  1.1079 +	    bitcache = *mask++;
  1.1080 +	    bitmask = CREATE_BITMASK (mask_x & 31);
  1.1081 +
  1.1082 +	    while (w--)
  1.1083 +	    {
  1.1084 +		if (bitmask == 0)
  1.1085 +		{
  1.1086 +		    bitcache = *mask++;
  1.1087 +		    bitmask = CREATE_BITMASK (0);
  1.1088 +		}
  1.1089 +		if (bitcache & bitmask)
  1.1090 +		    *dst = src565;
  1.1091 +		bitmask = UPDATE_BITMASK (bitmask);
  1.1092 +		dst++;
  1.1093 +	    }
  1.1094 +	}
  1.1095 +    }
  1.1096 +    else
  1.1097 +    {
  1.1098 +	while (height--)
  1.1099 +	{
  1.1100 +	    dst = dst_line;
  1.1101 +	    dst_line += dst_stride;
  1.1102 +	    mask = mask_line;
  1.1103 +	    mask_line += mask_stride;
  1.1104 +	    w = width;
  1.1105 +
  1.1106 +	    bitcache = *mask++;
  1.1107 +	    bitmask = CREATE_BITMASK (mask_x & 31);
  1.1108 +
  1.1109 +	    while (w--)
  1.1110 +	    {
  1.1111 +		if (bitmask == 0)
  1.1112 +		{
  1.1113 +		    bitcache = *mask++;
  1.1114 +		    bitmask = CREATE_BITMASK (0);
  1.1115 +		}
  1.1116 +		if (bitcache & bitmask)
  1.1117 +		{
  1.1118 +		    d = over (src, convert_0565_to_0888 (*dst));
  1.1119 +		    *dst = convert_8888_to_0565 (d);
  1.1120 +		}
  1.1121 +		bitmask = UPDATE_BITMASK (bitmask);
  1.1122 +		dst++;
  1.1123 +	    }
  1.1124 +	}
  1.1125 +    }
  1.1126 +}
  1.1127 +
  1.1128 +/*
  1.1129 + * Simple bitblt
  1.1130 + */
  1.1131 +
  1.1132 +static void
  1.1133 +fast_composite_solid_fill (pixman_implementation_t *imp,
  1.1134 +                           pixman_composite_info_t *info)
  1.1135 +{
  1.1136 +    PIXMAN_COMPOSITE_ARGS (info);
  1.1137 +    uint32_t src;
  1.1138 +
  1.1139 +    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
  1.1140 +
  1.1141 +    if (dest_image->bits.format == PIXMAN_a1)
  1.1142 +    {
  1.1143 +	src = src >> 31;
  1.1144 +    }
  1.1145 +    else if (dest_image->bits.format == PIXMAN_a8)
  1.1146 +    {
  1.1147 +	src = src >> 24;
  1.1148 +    }
  1.1149 +    else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
  1.1150 +             dest_image->bits.format == PIXMAN_b5g6r5)
  1.1151 +    {
  1.1152 +	src = convert_8888_to_0565 (src);
  1.1153 +    }
  1.1154 +
  1.1155 +    pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
  1.1156 +                 PIXMAN_FORMAT_BPP (dest_image->bits.format),
  1.1157 +                 dest_x, dest_y,
  1.1158 +                 width, height,
  1.1159 +                 src);
  1.1160 +}
  1.1161 +
  1.1162 +static void
  1.1163 +fast_composite_src_memcpy (pixman_implementation_t *imp,
  1.1164 +			   pixman_composite_info_t *info)
  1.1165 +{
  1.1166 +    PIXMAN_COMPOSITE_ARGS (info);
  1.1167 +    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
  1.1168 +    uint32_t n_bytes = width * bpp;
  1.1169 +    int dst_stride, src_stride;
  1.1170 +    uint8_t    *dst;
  1.1171 +    uint8_t    *src;
  1.1172 +
  1.1173 +    src_stride = src_image->bits.rowstride * 4;
  1.1174 +    dst_stride = dest_image->bits.rowstride * 4;
  1.1175 +
  1.1176 +    src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
  1.1177 +    dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
  1.1178 +
  1.1179 +    while (height--)
  1.1180 +    {
  1.1181 +	memcpy (dst, src, n_bytes);
  1.1182 +
  1.1183 +	dst += dst_stride;
  1.1184 +	src += src_stride;
  1.1185 +    }
  1.1186 +}
  1.1187 +
  1.1188 +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
  1.1189 +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
  1.1190 +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
  1.1191 +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
  1.1192 +FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
  1.1193 +FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
  1.1194 +FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
  1.1195 +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
  1.1196 +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
  1.1197 +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
  1.1198 +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
  1.1199 +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
  1.1200 +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
  1.1201 +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
  1.1202 +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
  1.1203 +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
  1.1204 +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
  1.1205 +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
  1.1206 +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
  1.1207 +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
  1.1208 +
  1.1209 +static force_inline void
  1.1210 +scaled_bilinear_scanline_8888_565_OVER (uint16_t *       dst,
  1.1211 +                                        const uint32_t * mask,
  1.1212 +                                        const uint32_t * src_top,
  1.1213 +                                        const uint32_t * src_bottom,
  1.1214 +                                        int32_t          w,
  1.1215 +                                        int              wt,
  1.1216 +                                        int              wb,
  1.1217 +                                        pixman_fixed_t   vx,
  1.1218 +                                        pixman_fixed_t   unit_x,
  1.1219 +                                        pixman_fixed_t   max_vx,
  1.1220 +                                        pixman_bool_t    zero_src)
  1.1221 +{
  1.1222 +    while ((w -= 1) >= 0)
  1.1223 +    {
  1.1224 +	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
  1.1225 +	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1.1226 +	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1.1227 +	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1.1228 +	uint32_t src, result;
  1.1229 +	uint16_t d;
  1.1230 +	d = *dst;
  1.1231 +	src = bilinear_interpolation (tl, tr,
  1.1232 +				      bl, br,
  1.1233 +				      pixman_fixed_to_bilinear_weight(vx),
  1.1234 +				      wb);
  1.1235 +	vx += unit_x;
  1.1236 +	result = over (src, convert_0565_to_0888 (d));
  1.1237 +	*dst++ = convert_8888_to_0565 (result);
  1.1238 +    }
  1.1239 +}
  1.1240 +
  1.1241 +static force_inline void
  1.1242 +scaled_bilinear_scanline_8888_8888_OVER (uint32_t *       dst,
  1.1243 +                                         const uint32_t * mask,
  1.1244 +                                         const uint32_t * src_top,
  1.1245 +                                         const uint32_t * src_bottom,
  1.1246 +                                         int32_t          w,
  1.1247 +                                         int              wt,
  1.1248 +                                         int              wb,
  1.1249 +                                         pixman_fixed_t   vx,
  1.1250 +                                         pixman_fixed_t   unit_x,
  1.1251 +                                         pixman_fixed_t   max_vx,
  1.1252 +                                         pixman_bool_t    zero_src)
  1.1253 +{
  1.1254 +    while ((w -= 1) >= 0)
  1.1255 +    {
  1.1256 +	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
  1.1257 +	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1.1258 +	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1.1259 +	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1.1260 +	uint32_t src;
  1.1261 +	uint32_t d;
  1.1262 +	uint32_t result;
  1.1263 +	d = *dst;
  1.1264 +	src = bilinear_interpolation (tl, tr,
  1.1265 +				      bl, br,
  1.1266 +				      pixman_fixed_to_bilinear_weight(vx),
  1.1267 +				      wb);
  1.1268 +	vx += unit_x;
  1.1269 +	*dst++ = over (src, d);
  1.1270 +    }
  1.1271 +}
  1.1272 +
  1.1273 +#ifndef LOWER_QUALITY_INTERPOLATION
  1.1274 +
  1.1275 +static force_inline void
  1.1276 +scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
  1.1277 +				      const uint32_t * mask,
  1.1278 +				      const uint16_t * src_top,
  1.1279 +				      const uint16_t * src_bottom,
  1.1280 +				      int32_t          w,
  1.1281 +				      int              wt,
  1.1282 +				      int              wb,
  1.1283 +				      pixman_fixed_t   vx,
  1.1284 +				      pixman_fixed_t   unit_x,
  1.1285 +				      pixman_fixed_t   max_vx,
  1.1286 +				      pixman_bool_t    zero_src)
  1.1287 +{
  1.1288 +    while ((w -= 1) >= 0)
  1.1289 +    {
  1.1290 +	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
  1.1291 +	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1.1292 +	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1.1293 +	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1.1294 +	uint32_t d;
  1.1295 +	d = bilinear_interpolation(convert_0565_to_8888 (tl),
  1.1296 +				   convert_0565_to_8888 (tr),
  1.1297 +				   convert_0565_to_8888 (bl),
  1.1298 +				   convert_0565_to_8888 (br),
  1.1299 +				   pixman_fixed_to_bilinear_weight (vx),
  1.1300 +				   wb);
  1.1301 +	vx += unit_x;
  1.1302 +	*dst++ = convert_8888_to_0565 (d);
  1.1303 +    }
  1.1304 +}
  1.1305 +
  1.1306 +#else
  1.1307 +
  1.1308 +/* This is a clever low resolution bilinear interpolation inspired by the code
  1.1309 +   in Skia */
  1.1310 +
  1.1311 +/* This takes the green component from the 565 representation and moves it:
  1.1312 +   00000000 00000000 rrrrrggg gggbbbbb
  1.1313 +
  1.1314 +   00000ggg ggg00000 rrrrr000 000bbbbb
  1.1315 +
  1.1316 +   This gives us 5 extra bits of space before each component to let us do
  1.1317 +   SWAR style optimizations
  1.1318 +*/
  1.1319 +
  1.1320 +#define GREEN_MASK (((1 << 6) - 1) << 5)
  1.1321 +
  1.1322 +static inline uint32_t
  1.1323 +expand_rgb_565 (uint16_t c) {
  1.1324 +    return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK);
  1.1325 +}
  1.1326 +
  1.1327 +static inline uint16_t
  1.1328 +compact_rgb_565 (uint32_t c) {
  1.1329 +    return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK);
  1.1330 +}
  1.1331 +
  1.1332 +static inline uint16_t
  1.1333 +bilinear_interpolation_565(uint16_t tl, uint16_t tr,
  1.1334 +			   uint16_t bl, uint16_t br,
  1.1335 +			   int x, int y)
  1.1336 +{
  1.1337 +    int xy;
  1.1338 +    uint32_t a00 = expand_rgb_565 (tl);
  1.1339 +    uint32_t a01 = expand_rgb_565 (tr);
  1.1340 +    uint32_t a10 = expand_rgb_565 (bl);
  1.1341 +    uint32_t a11 = expand_rgb_565 (br);
  1.1342 +
  1.1343 +    xy = (x * y) >> 3;
  1.1344 +    return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) +
  1.1345 +			     a01 * (2*x - xy) +
  1.1346 +			     a10 * (2*y - xy) +
  1.1347 +			     a11 * xy) >> 5);
  1.1348 +}
  1.1349 +
  1.1350 +static force_inline void
  1.1351 +scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
  1.1352 +				      const uint32_t * mask,
  1.1353 +				      const uint16_t * src_top,
  1.1354 +				      const uint16_t * src_bottom,
  1.1355 +				      int32_t          w,
  1.1356 +				      int              wt,
  1.1357 +				      int              wb,
  1.1358 +				      pixman_fixed_t   vx,
  1.1359 +				      pixman_fixed_t   unit_x,
  1.1360 +				      pixman_fixed_t   max_vx,
  1.1361 +				      pixman_bool_t    zero_src)
  1.1362 +{
  1.1363 +    while ((w -= 1) >= 0)
  1.1364 +    {
  1.1365 +	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
  1.1366 +	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1.1367 +	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1.1368 +	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1.1369 +
  1.1370 +        uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br,
  1.1371 +						 pixman_fixed_to_bilinear_weight(vx),
  1.1372 +						 wb);
  1.1373 +        vx += unit_x;
  1.1374 +        *dst++ = d;
  1.1375 +    }
  1.1376 +}
  1.1377 +
  1.1378 +#endif
  1.1379 +
  1.1380 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
  1.1381 +			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1.1382 +			       uint16_t, uint32_t, uint16_t,
  1.1383 +			       COVER, FLAG_NONE)
  1.1384 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
  1.1385 +			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1.1386 +			       uint16_t, uint32_t, uint16_t,
  1.1387 +			       PAD, FLAG_NONE)
  1.1388 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
  1.1389 +			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1.1390 +			       uint16_t, uint32_t, uint16_t,
  1.1391 +			       NONE, FLAG_NONE)
  1.1392 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
  1.1393 +			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1.1394 +			       uint16_t, uint32_t, uint16_t,
  1.1395 +			       NORMAL, FLAG_NONE)
  1.1396 +
  1.1397 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
  1.1398 +			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1.1399 +			       uint32_t, uint32_t, uint16_t,
  1.1400 +			       COVER, FLAG_NONE)
  1.1401 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
  1.1402 +			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1.1403 +			       uint32_t, uint32_t, uint16_t,
  1.1404 +			       PAD, FLAG_NONE)
  1.1405 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
  1.1406 +			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1.1407 +			       uint32_t, uint32_t, uint16_t,
  1.1408 +			       NONE, FLAG_NONE)
  1.1409 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
  1.1410 +			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1.1411 +			       uint32_t, uint32_t, uint16_t,
  1.1412 +			       NORMAL, FLAG_NONE)
  1.1413 +
  1.1414 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
  1.1415 +			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1.1416 +			       uint32_t, uint32_t, uint32_t,
  1.1417 +			       COVER, FLAG_NONE)
  1.1418 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
  1.1419 +			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1.1420 +			       uint32_t, uint32_t, uint32_t,
  1.1421 +			       PAD, FLAG_NONE)
  1.1422 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
  1.1423 +			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1.1424 +			       uint32_t, uint32_t, uint32_t,
  1.1425 +			       NONE, FLAG_NONE)
  1.1426 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
  1.1427 +			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1.1428 +			       uint32_t, uint32_t, uint32_t,
  1.1429 +			       NORMAL, FLAG_NONE)
  1.1430 +
  1.1431 +#define REPEAT_MIN_WIDTH    32
  1.1432 +
  1.1433 +static void
  1.1434 +fast_composite_tiled_repeat (pixman_implementation_t *imp,
  1.1435 +			     pixman_composite_info_t *info)
  1.1436 +{
  1.1437 +    PIXMAN_COMPOSITE_ARGS (info);
  1.1438 +    pixman_composite_func_t func;
  1.1439 +    pixman_format_code_t mask_format;
  1.1440 +    uint32_t src_flags, mask_flags;
  1.1441 +    int32_t sx, sy;
  1.1442 +    int32_t width_remain;
  1.1443 +    int32_t num_pixels;
  1.1444 +    int32_t src_width;
  1.1445 +    int32_t i, j;
  1.1446 +    pixman_image_t extended_src_image;
  1.1447 +    uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
  1.1448 +    pixman_bool_t need_src_extension;
  1.1449 +    uint32_t *src_line;
  1.1450 +    int32_t src_stride;
  1.1451 +    int32_t src_bpp;
  1.1452 +    pixman_composite_info_t info2 = *info;
  1.1453 +
  1.1454 +    src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
  1.1455 +		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
  1.1456 +
  1.1457 +    if (mask_image)
  1.1458 +    {
  1.1459 +	mask_format = mask_image->common.extended_format_code;
  1.1460 +	mask_flags = info->mask_flags;
  1.1461 +    }
  1.1462 +    else
  1.1463 +    {
  1.1464 +	mask_format = PIXMAN_null;
  1.1465 +	mask_flags = FAST_PATH_IS_OPAQUE;
  1.1466 +    }
  1.1467 +
  1.1468 +    _pixman_implementation_lookup_composite (
  1.1469 +	imp->toplevel, info->op,
  1.1470 +	src_image->common.extended_format_code, src_flags,
  1.1471 +	mask_format, mask_flags,
  1.1472 +	dest_image->common.extended_format_code, info->dest_flags,
  1.1473 +	&imp, &func);
  1.1474 +
  1.1475 +    src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
  1.1476 +
  1.1477 +    if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
  1.1478 +	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
  1.1479 +	!src_image->bits.indexed)
  1.1480 +    {
  1.1481 +	sx = src_x;
  1.1482 +	sx = MOD (sx, src_image->bits.width);
  1.1483 +	sx += width;
  1.1484 +	src_width = 0;
  1.1485 +
  1.1486 +	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
  1.1487 +	    src_width += src_image->bits.width;
  1.1488 +
  1.1489 +	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
  1.1490 +
  1.1491 +	/* Initialize/validate stack-allocated temporary image */
  1.1492 +	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
  1.1493 +				 src_width, 1, &extended_src[0], src_stride,
  1.1494 +				 FALSE);
  1.1495 +	_pixman_image_validate (&extended_src_image);
  1.1496 +
  1.1497 +	info2.src_image = &extended_src_image;
  1.1498 +	need_src_extension = TRUE;
  1.1499 +    }
  1.1500 +    else
  1.1501 +    {
  1.1502 +	src_width = src_image->bits.width;
  1.1503 +	need_src_extension = FALSE;
  1.1504 +    }
  1.1505 +
  1.1506 +    sx = src_x;
  1.1507 +    sy = src_y;
  1.1508 +
  1.1509 +    while (--height >= 0)
  1.1510 +    {
  1.1511 +	sx = MOD (sx, src_width);
  1.1512 +	sy = MOD (sy, src_image->bits.height);
  1.1513 +
  1.1514 +	if (need_src_extension)
  1.1515 +	{
  1.1516 +	    if (src_bpp == 32)
  1.1517 +	    {
  1.1518 +		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
  1.1519 +
  1.1520 +		for (i = 0; i < src_width; )
  1.1521 +		{
  1.1522 +		    for (j = 0; j < src_image->bits.width; j++, i++)
  1.1523 +			extended_src[i] = src_line[j];
  1.1524 +		}
  1.1525 +	    }
  1.1526 +	    else if (src_bpp == 16)
  1.1527 +	    {
  1.1528 +		uint16_t *src_line_16;
  1.1529 +
  1.1530 +		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
  1.1531 +				       src_line_16, 1);
  1.1532 +		src_line = (uint32_t*)src_line_16;
  1.1533 +
  1.1534 +		for (i = 0; i < src_width; )
  1.1535 +		{
  1.1536 +		    for (j = 0; j < src_image->bits.width; j++, i++)
  1.1537 +			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
  1.1538 +		}
  1.1539 +	    }
  1.1540 +	    else if (src_bpp == 8)
  1.1541 +	    {
  1.1542 +		uint8_t *src_line_8;
  1.1543 +
  1.1544 +		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
  1.1545 +				       src_line_8, 1);
  1.1546 +		src_line = (uint32_t*)src_line_8;
  1.1547 +
  1.1548 +		for (i = 0; i < src_width; )
  1.1549 +		{
  1.1550 +		    for (j = 0; j < src_image->bits.width; j++, i++)
  1.1551 +			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
  1.1552 +		}
  1.1553 +	    }
  1.1554 +
  1.1555 +	    info2.src_y = 0;
  1.1556 +	}
  1.1557 +	else
  1.1558 +	{
  1.1559 +	    info2.src_y = sy;
  1.1560 +	}
  1.1561 +
  1.1562 +	width_remain = width;
  1.1563 +
  1.1564 +	while (width_remain > 0)
  1.1565 +	{
  1.1566 +	    num_pixels = src_width - sx;
  1.1567 +
  1.1568 +	    if (num_pixels > width_remain)
  1.1569 +		num_pixels = width_remain;
  1.1570 +
  1.1571 +	    info2.src_x = sx;
  1.1572 +	    info2.width = num_pixels;
  1.1573 +	    info2.height = 1;
  1.1574 +
  1.1575 +	    func (imp, &info2);
  1.1576 +
  1.1577 +	    width_remain -= num_pixels;
  1.1578 +	    info2.mask_x += num_pixels;
  1.1579 +	    info2.dest_x += num_pixels;
  1.1580 +	    sx = 0;
  1.1581 +	}
  1.1582 +
  1.1583 +	sx = src_x;
  1.1584 +	sy++;
  1.1585 +	info2.mask_x = info->mask_x;
  1.1586 +	info2.mask_y++;
  1.1587 +	info2.dest_x = info->dest_x;
  1.1588 +	info2.dest_y++;
  1.1589 +    }
  1.1590 +
  1.1591 +    if (need_src_extension)
  1.1592 +	_pixman_image_fini (&extended_src_image);
  1.1593 +}
  1.1594 +
  1.1595 +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
  1.1596 +static force_inline void
  1.1597 +scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
  1.1598 +				     const uint16_t * src,
  1.1599 +				     int32_t          w,
  1.1600 +				     pixman_fixed_t   vx,
  1.1601 +				     pixman_fixed_t   unit_x,
  1.1602 +				     pixman_fixed_t   max_vx,
  1.1603 +				     pixman_bool_t    fully_transparent_src)
  1.1604 +{
  1.1605 +    uint16_t tmp1, tmp2, tmp3, tmp4;
  1.1606 +    while ((w -= 4) >= 0)
  1.1607 +    {
  1.1608 +	tmp1 = *(src + pixman_fixed_to_int (vx));
  1.1609 +	vx += unit_x;
  1.1610 +	tmp2 = *(src + pixman_fixed_to_int (vx));
  1.1611 +	vx += unit_x;
  1.1612 +	tmp3 = *(src + pixman_fixed_to_int (vx));
  1.1613 +	vx += unit_x;
  1.1614 +	tmp4 = *(src + pixman_fixed_to_int (vx));
  1.1615 +	vx += unit_x;
  1.1616 +	*dst++ = tmp1;
  1.1617 +	*dst++ = tmp2;
  1.1618 +	*dst++ = tmp3;
  1.1619 +	*dst++ = tmp4;
  1.1620 +    }
  1.1621 +    if (w & 2)
  1.1622 +    {
  1.1623 +	tmp1 = *(src + pixman_fixed_to_int (vx));
  1.1624 +	vx += unit_x;
  1.1625 +	tmp2 = *(src + pixman_fixed_to_int (vx));
  1.1626 +	vx += unit_x;
  1.1627 +	*dst++ = tmp1;
  1.1628 +	*dst++ = tmp2;
  1.1629 +    }
  1.1630 +    if (w & 1)
  1.1631 +	*dst = *(src + pixman_fixed_to_int (vx));
  1.1632 +}
  1.1633 +
  1.1634 +FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
  1.1635 +		       scaled_nearest_scanline_565_565_SRC,
  1.1636 +		       uint16_t, uint16_t, COVER)
  1.1637 +FAST_NEAREST_MAINLOOP (565_565_none_SRC,
  1.1638 +		       scaled_nearest_scanline_565_565_SRC,
  1.1639 +		       uint16_t, uint16_t, NONE)
  1.1640 +FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
  1.1641 +		       scaled_nearest_scanline_565_565_SRC,
  1.1642 +		       uint16_t, uint16_t, PAD)
  1.1643 +
  1.1644 +static force_inline uint32_t
  1.1645 +fetch_nearest (pixman_repeat_t src_repeat,
  1.1646 +	       pixman_format_code_t format,
  1.1647 +	       uint32_t *src, int x, int src_width)
  1.1648 +{
  1.1649 +    if (repeat (src_repeat, &x, src_width))
  1.1650 +    {
  1.1651 +	if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
  1.1652 +	    return *(src + x) | 0xff000000;
  1.1653 +	else
  1.1654 +	    return *(src + x);
  1.1655 +    }
  1.1656 +    else
  1.1657 +    {
  1.1658 +	return 0;
  1.1659 +    }
  1.1660 +}
  1.1661 +
  1.1662 +static force_inline void
  1.1663 +combine_over (uint32_t s, uint32_t *dst)
  1.1664 +{
  1.1665 +    if (s)
  1.1666 +    {
  1.1667 +	uint8_t ia = 0xff - (s >> 24);
  1.1668 +
  1.1669 +	if (ia)
  1.1670 +	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
  1.1671 +	else
  1.1672 +	    *dst = s;
  1.1673 +    }
  1.1674 +}
  1.1675 +
  1.1676 +static force_inline void
  1.1677 +combine_src (uint32_t s, uint32_t *dst)
  1.1678 +{
  1.1679 +    *dst = s;
  1.1680 +}
  1.1681 +
  1.1682 +static void
  1.1683 +fast_composite_scaled_nearest (pixman_implementation_t *imp,
  1.1684 +			       pixman_composite_info_t *info)
  1.1685 +{
  1.1686 +    PIXMAN_COMPOSITE_ARGS (info);
  1.1687 +    uint32_t       *dst_line;
  1.1688 +    uint32_t       *src_line;
  1.1689 +    int             dst_stride, src_stride;
  1.1690 +    int		    src_width, src_height;
  1.1691 +    pixman_repeat_t src_repeat;
  1.1692 +    pixman_fixed_t unit_x, unit_y;
  1.1693 +    pixman_format_code_t src_format;
  1.1694 +    pixman_vector_t v;
  1.1695 +    pixman_fixed_t vy;
  1.1696 +
  1.1697 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
  1.1698 +    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
  1.1699 +     * transformed from destination space to source space
  1.1700 +     */
  1.1701 +    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
  1.1702 +
  1.1703 +    /* reference point is the center of the pixel */
  1.1704 +    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
  1.1705 +    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
  1.1706 +    v.vector[2] = pixman_fixed_1;
  1.1707 +
  1.1708 +    if (!pixman_transform_point_3d (src_image->common.transform, &v))
  1.1709 +	return;
  1.1710 +
  1.1711 +    unit_x = src_image->common.transform->matrix[0][0];
  1.1712 +    unit_y = src_image->common.transform->matrix[1][1];
  1.1713 +
  1.1714 +    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
  1.1715 +    v.vector[0] -= pixman_fixed_e;
  1.1716 +    v.vector[1] -= pixman_fixed_e;
  1.1717 +
  1.1718 +    src_height = src_image->bits.height;
  1.1719 +    src_width = src_image->bits.width;
  1.1720 +    src_repeat = src_image->common.repeat;
  1.1721 +    src_format = src_image->bits.format;
  1.1722 +
  1.1723 +    vy = v.vector[1];
  1.1724 +    while (height--)
  1.1725 +    {
  1.1726 +        pixman_fixed_t vx = v.vector[0];
  1.1727 +	int y = pixman_fixed_to_int (vy);
  1.1728 +	uint32_t *dst = dst_line;
  1.1729 +
  1.1730 +	dst_line += dst_stride;
  1.1731 +
  1.1732 +        /* adjust the y location by a unit vector in the y direction
  1.1733 +         * this is equivalent to transforming y+1 of the destination point to source space */
  1.1734 +        vy += unit_y;
  1.1735 +
  1.1736 +	if (!repeat (src_repeat, &y, src_height))
  1.1737 +	{
  1.1738 +	    if (op == PIXMAN_OP_SRC)
  1.1739 +		memset (dst, 0, sizeof (*dst) * width);
  1.1740 +	}
  1.1741 +	else
  1.1742 +	{
  1.1743 +	    int w = width;
  1.1744 +
  1.1745 +	    uint32_t *src = src_line + y * src_stride;
  1.1746 +
  1.1747 +	    while (w >= 2)
  1.1748 +	    {
  1.1749 +		uint32_t s1, s2;
  1.1750 +		int x1, x2;
  1.1751 +
  1.1752 +		x1 = pixman_fixed_to_int (vx);
  1.1753 +		vx += unit_x;
  1.1754 +
  1.1755 +		x2 = pixman_fixed_to_int (vx);
  1.1756 +		vx += unit_x;
  1.1757 +
  1.1758 +		w -= 2;
  1.1759 +
  1.1760 +		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
  1.1761 +		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
  1.1762 +
  1.1763 +		if (op == PIXMAN_OP_OVER)
  1.1764 +		{
  1.1765 +		    combine_over (s1, dst++);
  1.1766 +		    combine_over (s2, dst++);
  1.1767 +		}
  1.1768 +		else
  1.1769 +		{
  1.1770 +		    combine_src (s1, dst++);
  1.1771 +		    combine_src (s2, dst++);
  1.1772 +		}
  1.1773 +	    }
  1.1774 +
  1.1775 +	    while (w--)
  1.1776 +	    {
  1.1777 +		uint32_t s;
  1.1778 +		int x;
  1.1779 +
  1.1780 +		x = pixman_fixed_to_int (vx);
  1.1781 +		vx += unit_x;
  1.1782 +
  1.1783 +		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
  1.1784 +
  1.1785 +		if (op == PIXMAN_OP_OVER)
  1.1786 +		    combine_over (s, dst++);
  1.1787 +		else
  1.1788 +		    combine_src (s, dst++);
  1.1789 +	    }
  1.1790 +	}
  1.1791 +    }
  1.1792 +}
  1.1793 +
  1.1794 +#define CACHE_LINE_SIZE 64
  1.1795 +
  1.1796 +#define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
  1.1797 +                                                                              \
  1.1798 +static void                                                                   \
  1.1799 +blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
  1.1800 +				 int             dst_stride,                  \
  1.1801 +				 const pix_type *src,                         \
  1.1802 +				 int             src_stride,                  \
  1.1803 +				 int             w,                           \
  1.1804 +				 int             h)                           \
  1.1805 +{                                                                             \
  1.1806 +    int x, y;                                                                 \
  1.1807 +    for (y = 0; y < h; y++)                                                   \
  1.1808 +    {                                                                         \
  1.1809 +	const pix_type *s = src + (h - y - 1);                                \
  1.1810 +	pix_type *d = dst + dst_stride * y;                                   \
  1.1811 +	for (x = 0; x < w; x++)                                               \
  1.1812 +	{                                                                     \
  1.1813 +	    *d++ = *s;                                                        \
  1.1814 +	    s += src_stride;                                                  \
  1.1815 +	}                                                                     \
  1.1816 +    }                                                                         \
  1.1817 +}                                                                             \
  1.1818 +                                                                              \
  1.1819 +static void                                                                   \
  1.1820 +blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
  1.1821 +				  int             dst_stride,                 \
  1.1822 +				  const pix_type *src,                        \
  1.1823 +				  int             src_stride,                 \
  1.1824 +				  int             w,                          \
  1.1825 +				  int             h)                          \
  1.1826 +{                                                                             \
  1.1827 +    int x, y;                                                                 \
  1.1828 +    for (y = 0; y < h; y++)                                                   \
  1.1829 +    {                                                                         \
  1.1830 +	const pix_type *s = src + src_stride * (w - 1) + y;                   \
  1.1831 +	pix_type *d = dst + dst_stride * y;                                   \
  1.1832 +	for (x = 0; x < w; x++)                                               \
  1.1833 +	{                                                                     \
  1.1834 +	    *d++ = *s;                                                        \
  1.1835 +	    s -= src_stride;                                                  \
  1.1836 +	}                                                                     \
  1.1837 +    }                                                                         \
  1.1838 +}                                                                             \
  1.1839 +                                                                              \
  1.1840 +static void                                                                   \
  1.1841 +blt_rotated_90_##suffix (pix_type       *dst,                                 \
  1.1842 +			 int             dst_stride,                          \
  1.1843 +			 const pix_type *src,                                 \
  1.1844 +			 int             src_stride,                          \
  1.1845 +			 int             W,                                   \
  1.1846 +			 int             H)                                   \
  1.1847 +{                                                                             \
  1.1848 +    int x;                                                                    \
  1.1849 +    int leading_pixels = 0, trailing_pixels = 0;                              \
  1.1850 +    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
  1.1851 +                                                                              \
  1.1852 +    /*                                                                        \
  1.1853 +     * split processing into handling destination as TILE_SIZExH cache line   \
  1.1854 +     * aligned vertical stripes (optimistically assuming that destination     \
  1.1855 +     * stride is a multiple of cache line, if not - it will be just a bit     \
  1.1856 +     * slower)                                                                \
  1.1857 +     */                                                                       \
  1.1858 +                                                                              \
  1.1859 +    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
  1.1860 +    {                                                                         \
  1.1861 +	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
  1.1862 +			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1.1863 +	if (leading_pixels > W)                                               \
  1.1864 +	    leading_pixels = W;                                               \
  1.1865 +                                                                              \
  1.1866 +	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
  1.1867 +	blt_rotated_90_trivial_##suffix (                                     \
  1.1868 +	    dst,                                                              \
  1.1869 +	    dst_stride,                                                       \
  1.1870 +	    src,                                                              \
  1.1871 +	    src_stride,                                                       \
  1.1872 +	    leading_pixels,                                                   \
  1.1873 +	    H);                                                               \
  1.1874 +	                                                                      \
  1.1875 +	dst += leading_pixels;                                                \
  1.1876 +	src += leading_pixels * src_stride;                                   \
  1.1877 +	W -= leading_pixels;                                                  \
  1.1878 +    }                                                                         \
  1.1879 +                                                                              \
  1.1880 +    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
  1.1881 +    {                                                                         \
  1.1882 +	trailing_pixels = (((uintptr_t)(dst + W) &                            \
  1.1883 +			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1.1884 +	if (trailing_pixels > W)                                              \
  1.1885 +	    trailing_pixels = W;                                              \
  1.1886 +	W -= trailing_pixels;                                                 \
  1.1887 +    }                                                                         \
  1.1888 +                                                                              \
  1.1889 +    for (x = 0; x < W; x += TILE_SIZE)                                        \
  1.1890 +    {                                                                         \
  1.1891 +	/* aligned middle part TILE_SIZExH */                                 \
  1.1892 +	blt_rotated_90_trivial_##suffix (                                     \
  1.1893 +	    dst + x,                                                          \
  1.1894 +	    dst_stride,                                                       \
  1.1895 +	    src + src_stride * x,                                             \
  1.1896 +	    src_stride,                                                       \
  1.1897 +	    TILE_SIZE,                                                        \
  1.1898 +	    H);                                                               \
  1.1899 +    }                                                                         \
  1.1900 +                                                                              \
  1.1901 +    if (trailing_pixels)                                                      \
  1.1902 +    {                                                                         \
  1.1903 +	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
  1.1904 +	blt_rotated_90_trivial_##suffix (                                     \
  1.1905 +	    dst + W,                                                          \
  1.1906 +	    dst_stride,                                                       \
  1.1907 +	    src + W * src_stride,                                             \
  1.1908 +	    src_stride,                                                       \
  1.1909 +	    trailing_pixels,                                                  \
  1.1910 +	    H);                                                               \
  1.1911 +    }                                                                         \
  1.1912 +}                                                                             \
  1.1913 +                                                                              \
  1.1914 +static void                                                                   \
  1.1915 +blt_rotated_270_##suffix (pix_type       *dst,                                \
  1.1916 +			  int             dst_stride,                         \
  1.1917 +			  const pix_type *src,                                \
  1.1918 +			  int             src_stride,                         \
  1.1919 +			  int             W,                                  \
  1.1920 +			  int             H)                                  \
  1.1921 +{                                                                             \
  1.1922 +    int x;                                                                    \
  1.1923 +    int leading_pixels = 0, trailing_pixels = 0;                              \
  1.1924 +    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
  1.1925 +                                                                              \
  1.1926 +    /*                                                                        \
  1.1927 +     * split processing into handling destination as TILE_SIZExH cache line   \
  1.1928 +     * aligned vertical stripes (optimistically assuming that destination     \
  1.1929 +     * stride is a multiple of cache line, if not - it will be just a bit     \
  1.1930 +     * slower)                                                                \
  1.1931 +     */                                                                       \
  1.1932 +                                                                              \
  1.1933 +    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
  1.1934 +    {                                                                         \
  1.1935 +	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
  1.1936 +			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1.1937 +	if (leading_pixels > W)                                               \
  1.1938 +	    leading_pixels = W;                                               \
  1.1939 +                                                                              \
  1.1940 +	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
  1.1941 +	blt_rotated_270_trivial_##suffix (                                    \
  1.1942 +	    dst,                                                              \
  1.1943 +	    dst_stride,                                                       \
  1.1944 +	    src + src_stride * (W - leading_pixels),                          \
  1.1945 +	    src_stride,                                                       \
  1.1946 +	    leading_pixels,                                                   \
  1.1947 +	    H);                                                               \
  1.1948 +	                                                                      \
  1.1949 +	dst += leading_pixels;                                                \
  1.1950 +	W -= leading_pixels;                                                  \
  1.1951 +    }                                                                         \
  1.1952 +                                                                              \
  1.1953 +    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
  1.1954 +    {                                                                         \
  1.1955 +	trailing_pixels = (((uintptr_t)(dst + W) &                            \
  1.1956 +			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1.1957 +	if (trailing_pixels > W)                                              \
  1.1958 +	    trailing_pixels = W;                                              \
  1.1959 +	W -= trailing_pixels;                                                 \
  1.1960 +	src += trailing_pixels * src_stride;                                  \
  1.1961 +    }                                                                         \
  1.1962 +                                                                              \
  1.1963 +    for (x = 0; x < W; x += TILE_SIZE)                                        \
  1.1964 +    {                                                                         \
  1.1965 +	/* aligned middle part TILE_SIZExH */                                 \
  1.1966 +	blt_rotated_270_trivial_##suffix (                                    \
  1.1967 +	    dst + x,                                                          \
  1.1968 +	    dst_stride,                                                       \
  1.1969 +	    src + src_stride * (W - x - TILE_SIZE),                           \
  1.1970 +	    src_stride,                                                       \
  1.1971 +	    TILE_SIZE,                                                        \
  1.1972 +	    H);                                                               \
  1.1973 +    }                                                                         \
  1.1974 +                                                                              \
  1.1975 +    if (trailing_pixels)                                                      \
  1.1976 +    {                                                                         \
  1.1977 +	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
  1.1978 +	blt_rotated_270_trivial_##suffix (                                    \
  1.1979 +	    dst + W,                                                          \
  1.1980 +	    dst_stride,                                                       \
  1.1981 +	    src - trailing_pixels * src_stride,                               \
  1.1982 +	    src_stride,                                                       \
  1.1983 +	    trailing_pixels,                                                  \
  1.1984 +	    H);                                                               \
  1.1985 +    }                                                                         \
  1.1986 +}                                                                             \
  1.1987 +                                                                              \
  1.1988 +static void                                                                   \
  1.1989 +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
  1.1990 +				   pixman_composite_info_t *info)	      \
  1.1991 +{									      \
  1.1992 +    PIXMAN_COMPOSITE_ARGS (info);					      \
  1.1993 +    pix_type       *dst_line;						      \
  1.1994 +    pix_type       *src_line;                                                 \
  1.1995 +    int             dst_stride, src_stride;                                   \
  1.1996 +    int             src_x_t, src_y_t;                                         \
  1.1997 +                                                                              \
  1.1998 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
  1.1999 +			   dst_stride, dst_line, 1);                          \
  1.2000 +    src_x_t = -src_y + pixman_fixed_to_int (                                  \
  1.2001 +				src_image->common.transform->matrix[0][2] +   \
  1.2002 +				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
  1.2003 +    src_y_t = src_x + pixman_fixed_to_int (                                   \
  1.2004 +				src_image->common.transform->matrix[1][2] +   \
  1.2005 +				pixman_fixed_1 / 2 - pixman_fixed_e);         \
  1.2006 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
  1.2007 +			   src_stride, src_line, 1);                          \
  1.2008 +    blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
  1.2009 +			     width, height);                                  \
  1.2010 +}                                                                             \
  1.2011 +                                                                              \
  1.2012 +static void                                                                   \
  1.2013 +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
  1.2014 +				    pixman_composite_info_t *info)            \
  1.2015 +{                                                                             \
  1.2016 +    PIXMAN_COMPOSITE_ARGS (info);					      \
  1.2017 +    pix_type       *dst_line;						      \
  1.2018 +    pix_type       *src_line;                                                 \
  1.2019 +    int             dst_stride, src_stride;                                   \
  1.2020 +    int             src_x_t, src_y_t;                                         \
  1.2021 +                                                                              \
  1.2022 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
  1.2023 +			   dst_stride, dst_line, 1);                          \
  1.2024 +    src_x_t = src_y + pixman_fixed_to_int (                                   \
  1.2025 +				src_image->common.transform->matrix[0][2] +   \
  1.2026 +				pixman_fixed_1 / 2 - pixman_fixed_e);         \
  1.2027 +    src_y_t = -src_x + pixman_fixed_to_int (                                  \
  1.2028 +				src_image->common.transform->matrix[1][2] +   \
  1.2029 +				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
  1.2030 +    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
  1.2031 +			   src_stride, src_line, 1);                          \
  1.2032 +    blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
  1.2033 +			      width, height);                                 \
  1.2034 +}
  1.2035 +
  1.2036 +FAST_SIMPLE_ROTATE (8, uint8_t)
  1.2037 +FAST_SIMPLE_ROTATE (565, uint16_t)
  1.2038 +FAST_SIMPLE_ROTATE (8888, uint32_t)
  1.2039 +
  1.2040 +static const pixman_fast_path_t c_fast_paths[] =
  1.2041 +{
  1.2042 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
  1.2043 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
  1.2044 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
  1.2045 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
  1.2046 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
  1.2047 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
  1.2048 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
  1.2049 +    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
  1.2050 +    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
  1.2051 +    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
  1.2052 +    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
  1.2053 +    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
  1.2054 +    PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
  1.2055 +    PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
  1.2056 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
  1.2057 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
  1.2058 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
  1.2059 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
  1.2060 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
  1.2061 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
  1.2062 +    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
  1.2063 +    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
  1.2064 +    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
  1.2065 +    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
  1.2066 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
  1.2067 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
  1.2068 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
  1.2069 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
  1.2070 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
  1.2071 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
  1.2072 +    PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
  1.2073 +    PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
  1.2074 +    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
  1.2075 +    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
  1.2076 +    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
  1.2077 +    PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
  1.2078 +    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
  1.2079 +    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
  1.2080 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
  1.2081 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
  1.2082 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
  1.2083 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
  1.2084 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
  1.2085 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
  1.2086 +    PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
  1.2087 +    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
  1.2088 +    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
  1.2089 +    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
  1.2090 +    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
  1.2091 +    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
  1.2092 +    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
  1.2093 +    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
  1.2094 +    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
  1.2095 +    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
  1.2096 +    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
  1.2097 +    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
  1.2098 +    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
  1.2099 +    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
  1.2100 +    PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
  1.2101 +    PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
  1.2102 +    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
  1.2103 +    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
  1.2104 +    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
  1.2105 +    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
  1.2106 +    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
  1.2107 +
  1.2108 +    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
  1.2109 +    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
  1.2110 +    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
  1.2111 +    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
  1.2112 +
  1.2113 +    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
  1.2114 +    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
  1.2115 +
  1.2116 +    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
  1.2117 +    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
  1.2118 +
  1.2119 +    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
  1.2120 +
  1.2121 +    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
  1.2122 +    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
  1.2123 +    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
  1.2124 +    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
  1.2125 +    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
  1.2126 +    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
  1.2127 +
  1.2128 +    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
  1.2129 +    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
  1.2130 +    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
  1.2131 +    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
  1.2132 +
  1.2133 +    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
  1.2134 +
  1.2135 +#define NEAREST_FAST_PATH(op,s,d)		\
  1.2136 +    {   PIXMAN_OP_ ## op,			\
  1.2137 +	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
  1.2138 +	PIXMAN_null, 0,				\
  1.2139 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
  1.2140 +	fast_composite_scaled_nearest,		\
  1.2141 +    }
  1.2142 +
  1.2143 +    NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
  1.2144 +    NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
  1.2145 +    NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
  1.2146 +    NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
  1.2147 +
  1.2148 +    NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
  1.2149 +    NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
  1.2150 +    NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
  1.2151 +    NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
  1.2152 +
  1.2153 +    NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
  1.2154 +    NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
  1.2155 +    NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
  1.2156 +    NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
  1.2157 +
  1.2158 +    NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
  1.2159 +    NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
  1.2160 +    NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
  1.2161 +    NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
  1.2162 +
  1.2163 +#define SIMPLE_ROTATE_FLAGS(angle)					  \
  1.2164 +    (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
  1.2165 +     FAST_PATH_NEAREST_FILTER			|			  \
  1.2166 +     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
  1.2167 +     FAST_PATH_STANDARD_FLAGS)
  1.2168 +
  1.2169 +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
  1.2170 +    {   PIXMAN_OP_ ## op,						  \
  1.2171 +	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
  1.2172 +	PIXMAN_null, 0,							  \
  1.2173 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
  1.2174 +	fast_composite_rotate_90_##suffix,				  \
  1.2175 +    },									  \
  1.2176 +    {   PIXMAN_OP_ ## op,						  \
  1.2177 +	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
  1.2178 +	PIXMAN_null, 0,							  \
  1.2179 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
  1.2180 +	fast_composite_rotate_270_##suffix,				  \
  1.2181 +    }
  1.2182 +
  1.2183 +    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
  1.2184 +    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
  1.2185 +    SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
  1.2186 +    SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
  1.2187 +    SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
  1.2188 +
  1.2189 +    /* Simple repeat fast path entry. */
  1.2190 +    {	PIXMAN_OP_any,
  1.2191 +	PIXMAN_any,
  1.2192 +	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
  1.2193 +	 FAST_PATH_NORMAL_REPEAT),
  1.2194 +	PIXMAN_any, 0,
  1.2195 +	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
  1.2196 +	fast_composite_tiled_repeat
  1.2197 +    },
  1.2198 +
  1.2199 +    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
  1.2200 +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
  1.2201 +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
  1.2202 +
  1.2203 +    {   PIXMAN_OP_NONE	},
  1.2204 +};
  1.2205 +
  1.2206 +#ifdef WORDS_BIGENDIAN
  1.2207 +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
  1.2208 +#else
  1.2209 +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
  1.2210 +#endif
  1.2211 +
  1.2212 +static force_inline void
  1.2213 +pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
  1.2214 +{
  1.2215 +    if (offs)
  1.2216 +    {
  1.2217 +	int leading_pixels = 32 - offs;
  1.2218 +	if (leading_pixels >= width)
  1.2219 +	{
  1.2220 +	    if (v)
  1.2221 +		*dst |= A1_FILL_MASK (width, offs);
  1.2222 +	    else
  1.2223 +		*dst &= ~A1_FILL_MASK (width, offs);
  1.2224 +	    return;
  1.2225 +	}
  1.2226 +	else
  1.2227 +	{
  1.2228 +	    if (v)
  1.2229 +		*dst++ |= A1_FILL_MASK (leading_pixels, offs);
  1.2230 +	    else
  1.2231 +		*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
  1.2232 +	    width -= leading_pixels;
  1.2233 +	}
  1.2234 +    }
  1.2235 +    while (width >= 32)
  1.2236 +    {
  1.2237 +	if (v)
  1.2238 +	    *dst++ = 0xFFFFFFFF;
  1.2239 +	else
  1.2240 +	    *dst++ = 0;
  1.2241 +	width -= 32;
  1.2242 +    }
  1.2243 +    if (width > 0)
  1.2244 +    {
  1.2245 +	if (v)
  1.2246 +	    *dst |= A1_FILL_MASK (width, 0);
  1.2247 +	else
  1.2248 +	    *dst &= ~A1_FILL_MASK (width, 0);
  1.2249 +    }
  1.2250 +}
  1.2251 +
  1.2252 +static void
  1.2253 +pixman_fill1 (uint32_t *bits,
  1.2254 +              int       stride,
  1.2255 +              int       x,
  1.2256 +              int       y,
  1.2257 +              int       width,
  1.2258 +              int       height,
  1.2259 +              uint32_t  filler)
  1.2260 +{
  1.2261 +    uint32_t *dst = bits + y * stride + (x >> 5);
  1.2262 +    int offs = x & 31;
  1.2263 +
  1.2264 +    if (filler & 1)
  1.2265 +    {
  1.2266 +	while (height--)
  1.2267 +	{
  1.2268 +	    pixman_fill1_line (dst, offs, width, 1);
  1.2269 +	    dst += stride;
  1.2270 +	}
  1.2271 +    }
  1.2272 +    else
  1.2273 +    {
  1.2274 +	while (height--)
  1.2275 +	{
  1.2276 +	    pixman_fill1_line (dst, offs, width, 0);
  1.2277 +	    dst += stride;
  1.2278 +	}
  1.2279 +    }
  1.2280 +}
  1.2281 +
  1.2282 +static void
  1.2283 +pixman_fill8 (uint32_t *bits,
  1.2284 +              int       stride,
  1.2285 +              int       x,
  1.2286 +              int       y,
  1.2287 +              int       width,
  1.2288 +              int       height,
  1.2289 +              uint32_t  filler)
  1.2290 +{
  1.2291 +    int byte_stride = stride * (int) sizeof (uint32_t);
  1.2292 +    uint8_t *dst = (uint8_t *) bits;
  1.2293 +    uint8_t v = filler & 0xff;
  1.2294 +    int i;
  1.2295 +
  1.2296 +    dst = dst + y * byte_stride + x;
  1.2297 +
  1.2298 +    while (height--)
  1.2299 +    {
  1.2300 +	for (i = 0; i < width; ++i)
  1.2301 +	    dst[i] = v;
  1.2302 +
  1.2303 +	dst += byte_stride;
  1.2304 +    }
  1.2305 +}
  1.2306 +
  1.2307 +static void
  1.2308 +pixman_fill16 (uint32_t *bits,
  1.2309 +               int       stride,
  1.2310 +               int       x,
  1.2311 +               int       y,
  1.2312 +               int       width,
  1.2313 +               int       height,
  1.2314 +               uint32_t  filler)
  1.2315 +{
  1.2316 +    int short_stride =
  1.2317 +	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
  1.2318 +    uint16_t *dst = (uint16_t *)bits;
  1.2319 +    uint16_t v = filler & 0xffff;
  1.2320 +    int i;
  1.2321 +
  1.2322 +    dst = dst + y * short_stride + x;
  1.2323 +
  1.2324 +    while (height--)
  1.2325 +    {
  1.2326 +	for (i = 0; i < width; ++i)
  1.2327 +	    dst[i] = v;
  1.2328 +
  1.2329 +	dst += short_stride;
  1.2330 +    }
  1.2331 +}
  1.2332 +
  1.2333 +static void
  1.2334 +pixman_fill32 (uint32_t *bits,
  1.2335 +               int       stride,
  1.2336 +               int       x,
  1.2337 +               int       y,
  1.2338 +               int       width,
  1.2339 +               int       height,
  1.2340 +               uint32_t  filler)
  1.2341 +{
  1.2342 +    int i;
  1.2343 +
  1.2344 +    bits = bits + y * stride + x;
  1.2345 +
  1.2346 +    while (height--)
  1.2347 +    {
  1.2348 +	for (i = 0; i < width; ++i)
  1.2349 +	    bits[i] = filler;
  1.2350 +
  1.2351 +	bits += stride;
  1.2352 +    }
  1.2353 +}
  1.2354 +
  1.2355 +static pixman_bool_t
  1.2356 +fast_path_fill (pixman_implementation_t *imp,
  1.2357 +                uint32_t *               bits,
  1.2358 +                int                      stride,
  1.2359 +                int                      bpp,
  1.2360 +                int                      x,
  1.2361 +                int                      y,
  1.2362 +                int                      width,
  1.2363 +                int                      height,
  1.2364 +                uint32_t		 filler)
  1.2365 +{
  1.2366 +    switch (bpp)
  1.2367 +    {
  1.2368 +    case 1:
  1.2369 +	pixman_fill1 (bits, stride, x, y, width, height, filler);
  1.2370 +	break;
  1.2371 +
  1.2372 +    case 8:
  1.2373 +	pixman_fill8 (bits, stride, x, y, width, height, filler);
  1.2374 +	break;
  1.2375 +
  1.2376 +    case 16:
  1.2377 +	pixman_fill16 (bits, stride, x, y, width, height, filler);
  1.2378 +	break;
  1.2379 +
  1.2380 +    case 32:
  1.2381 +	pixman_fill32 (bits, stride, x, y, width, height, filler);
  1.2382 +	break;
  1.2383 +
  1.2384 +    default:
  1.2385 +	return FALSE;
  1.2386 +    }
  1.2387 +
  1.2388 +    return TRUE;
  1.2389 +}
  1.2390 +
  1.2391 +/*****************************************************************************/
  1.2392 +
  1.2393 +static uint32_t *
  1.2394 +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
  1.2395 +{
  1.2396 +    int32_t w = iter->width;
  1.2397 +    uint32_t *dst = iter->buffer;
  1.2398 +    const uint16_t *src = (const uint16_t *)iter->bits;
  1.2399 +
  1.2400 +    iter->bits += iter->stride;
  1.2401 +
  1.2402 +    /* Align the source buffer at 4 bytes boundary */
  1.2403 +    if (w > 0 && ((uintptr_t)src & 3))
  1.2404 +    {
  1.2405 +	*dst++ = convert_0565_to_8888 (*src++);
  1.2406 +	w--;
  1.2407 +    }
  1.2408 +    /* Process two pixels per iteration */
  1.2409 +    while ((w -= 2) >= 0)
  1.2410 +    {
  1.2411 +	uint32_t sr, sb, sg, t0, t1;
  1.2412 +	uint32_t s = *(const uint32_t *)src;
  1.2413 +	src += 2;
  1.2414 +	sr = (s >> 8) & 0x00F800F8;
  1.2415 +	sb = (s << 3) & 0x00F800F8;
  1.2416 +	sg = (s >> 3) & 0x00FC00FC;
  1.2417 +	sr |= sr >> 5;
  1.2418 +	sb |= sb >> 5;
  1.2419 +	sg |= sg >> 6;
  1.2420 +	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
  1.2421 +	     (sb & 0xFF) | 0xFF000000;
  1.2422 +	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
  1.2423 +	     (sb >> 16) | 0xFF000000;
  1.2424 +#ifdef WORDS_BIGENDIAN
  1.2425 +	*dst++ = t1;
  1.2426 +	*dst++ = t0;
  1.2427 +#else
  1.2428 +	*dst++ = t0;
  1.2429 +	*dst++ = t1;
  1.2430 +#endif
  1.2431 +    }
  1.2432 +    if (w & 1)
  1.2433 +    {
  1.2434 +	*dst = convert_0565_to_8888 (*src);
  1.2435 +    }
  1.2436 +
  1.2437 +    return iter->buffer;
  1.2438 +}
  1.2439 +
  1.2440 +static uint32_t *
  1.2441 +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
  1.2442 +{
  1.2443 +    iter->bits += iter->stride;
  1.2444 +    return iter->buffer;
  1.2445 +}
  1.2446 +
  1.2447 +/* Helper function for a workaround, which tries to ensure that 0x1F001F
  1.2448 + * constant is always allocated in a register on RISC architectures.
  1.2449 + */
  1.2450 +static force_inline uint32_t
  1.2451 +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
  1.2452 +{
  1.2453 +    uint32_t a, b;
  1.2454 +    a = (s >> 3) & x1F001F;
  1.2455 +    b = s & 0xFC00;
  1.2456 +    a |= a >> 5;
  1.2457 +    a |= b >> 5;
  1.2458 +    return a;
  1.2459 +}
  1.2460 +
  1.2461 +static void
  1.2462 +fast_write_back_r5g6b5 (pixman_iter_t *iter)
  1.2463 +{
  1.2464 +    int32_t w = iter->width;
  1.2465 +    uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
  1.2466 +    const uint32_t *src = iter->buffer;
  1.2467 +    /* Workaround to ensure that x1F001F variable is allocated in a register */
  1.2468 +    static volatile uint32_t volatile_x1F001F = 0x1F001F;
  1.2469 +    uint32_t x1F001F = volatile_x1F001F;
  1.2470 +
  1.2471 +    while ((w -= 4) >= 0)
  1.2472 +    {
  1.2473 +	uint32_t s1 = *src++;
  1.2474 +	uint32_t s2 = *src++;
  1.2475 +	uint32_t s3 = *src++;
  1.2476 +	uint32_t s4 = *src++;
  1.2477 +	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
  1.2478 +	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
  1.2479 +	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
  1.2480 +	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
  1.2481 +    }
  1.2482 +    if (w & 2)
  1.2483 +    {
  1.2484 +	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
  1.2485 +	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
  1.2486 +    }
  1.2487 +    if (w & 1)
  1.2488 +    {
  1.2489 +	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
  1.2490 +    }
  1.2491 +}
  1.2492 +
  1.2493 +typedef struct
  1.2494 +{
  1.2495 +    pixman_format_code_t	format;
  1.2496 +    pixman_iter_get_scanline_t	get_scanline;
  1.2497 +    pixman_iter_write_back_t	write_back;
  1.2498 +} fetcher_info_t;
  1.2499 +
  1.2500 +static const fetcher_info_t fetchers[] =
  1.2501 +{
  1.2502 +    { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
  1.2503 +    { PIXMAN_null }
  1.2504 +};
  1.2505 +
  1.2506 +static pixman_bool_t
  1.2507 +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
  1.2508 +{
  1.2509 +    pixman_image_t *image = iter->image;
  1.2510 +
  1.2511 +#define FLAGS								\
  1.2512 +    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
  1.2513 +     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
  1.2514 +
  1.2515 +    if (iter->iter_flags & ITER_16)
  1.2516 +	    return FALSE;
  1.2517 +
  1.2518 +    if ((iter->iter_flags & ITER_NARROW)			&&
  1.2519 +	(iter->image_flags & FLAGS) == FLAGS)
  1.2520 +    {
  1.2521 +	const fetcher_info_t *f;
  1.2522 +
  1.2523 +	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
  1.2524 +	{
  1.2525 +	    if (image->common.extended_format_code == f->format)
  1.2526 +	    {
  1.2527 +		uint8_t *b = (uint8_t *)image->bits.bits;
  1.2528 +		int s = image->bits.rowstride * 4;
  1.2529 +
  1.2530 +		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
  1.2531 +		iter->stride = s;
  1.2532 +
  1.2533 +		iter->get_scanline = f->get_scanline;
  1.2534 +		return TRUE;
  1.2535 +	    }
  1.2536 +	}
  1.2537 +    }
  1.2538 +
  1.2539 +    return FALSE;
  1.2540 +}
  1.2541 +
  1.2542 +static pixman_bool_t
  1.2543 +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
  1.2544 +{
  1.2545 +    pixman_image_t *image = iter->image;
  1.2546 +
  1.2547 +    if (iter->iter_flags & ITER_16)
  1.2548 +	    return FALSE;
  1.2549 +
  1.2550 +    if ((iter->iter_flags & ITER_NARROW)		&&
  1.2551 +	(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
  1.2552 +    {
  1.2553 +	const fetcher_info_t *f;
  1.2554 +
  1.2555 +	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
  1.2556 +	{
  1.2557 +	    if (image->common.extended_format_code == f->format)
  1.2558 +	    {
  1.2559 +		uint8_t *b = (uint8_t *)image->bits.bits;
  1.2560 +		int s = image->bits.rowstride * 4;
  1.2561 +
  1.2562 +		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
  1.2563 +		iter->stride = s;
  1.2564 +
  1.2565 +		if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
  1.2566 +		    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
  1.2567 +		{
  1.2568 +		    iter->get_scanline = fast_dest_fetch_noop;
  1.2569 +		}
  1.2570 +		else
  1.2571 +		{
  1.2572 +		    iter->get_scanline = f->get_scanline;
  1.2573 +		}
  1.2574 +		iter->write_back = f->write_back;
  1.2575 +		return TRUE;
  1.2576 +	    }
  1.2577 +	}
  1.2578 +    }
  1.2579 +    return FALSE;
  1.2580 +}
  1.2581 +
  1.2582 +
  1.2583 +pixman_implementation_t *
  1.2584 +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
  1.2585 +{
  1.2586 +    pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
  1.2587 +
  1.2588 +    imp->fill = fast_path_fill;
  1.2589 +    imp->src_iter_init = fast_src_iter_init;
  1.2590 +    imp->dest_iter_init = fast_dest_iter_init;
  1.2591 +
  1.2592 +    return imp;
  1.2593 +}

mercurial