gfx/cairo/libpixman/src/pixman-fast-path.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
     2 /*
     3  * Copyright © 2000 SuSE, Inc.
     4  * Copyright © 2007 Red Hat, Inc.
     5  *
     6  * Permission to use, copy, modify, distribute, and sell this software and its
     7  * documentation for any purpose is hereby granted without fee, provided that
     8  * the above copyright notice appear in all copies and that both that
     9  * copyright notice and this permission notice appear in supporting
    10  * documentation, and that the name of SuSE not be used in advertising or
    11  * publicity pertaining to distribution of the software without specific,
    12  * written prior permission.  SuSE makes no representations about the
    13  * suitability of this software for any purpose.  It is provided "as is"
    14  * without express or implied warranty.
    15  *
    16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
    18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
    20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
    21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    22  *
    23  * Author:  Keith Packard, SuSE, Inc.
    24  */
    26 #ifdef HAVE_CONFIG_H
    27 #include <config.h>
    28 #endif
    29 #include <string.h>
    30 #include <stdlib.h>
    31 #include "pixman-private.h"
    32 #include "pixman-combine32.h"
    33 #include "pixman-inlines.h"
    35 static force_inline uint32_t
    36 fetch_24 (uint8_t *a)
    37 {
    38     if (((uintptr_t)a) & 1)
    39     {
    40 #ifdef WORDS_BIGENDIAN
    41 	return (*a << 16) | (*(uint16_t *)(a + 1));
    42 #else
    43 	return *a | (*(uint16_t *)(a + 1) << 8);
    44 #endif
    45     }
    46     else
    47     {
    48 #ifdef WORDS_BIGENDIAN
    49 	return (*(uint16_t *)a << 8) | *(a + 2);
    50 #else
    51 	return *(uint16_t *)a | (*(a + 2) << 16);
    52 #endif
    53     }
    54 }
    56 static force_inline void
    57 store_24 (uint8_t *a,
    58           uint32_t v)
    59 {
    60     if (((uintptr_t)a) & 1)
    61     {
    62 #ifdef WORDS_BIGENDIAN
    63 	*a = (uint8_t) (v >> 16);
    64 	*(uint16_t *)(a + 1) = (uint16_t) (v);
    65 #else
    66 	*a = (uint8_t) (v);
    67 	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
    68 #endif
    69     }
    70     else
    71     {
    72 #ifdef WORDS_BIGENDIAN
    73 	*(uint16_t *)a = (uint16_t)(v >> 8);
    74 	*(a + 2) = (uint8_t)v;
    75 #else
    76 	*(uint16_t *)a = (uint16_t)v;
    77 	*(a + 2) = (uint8_t)(v >> 16);
    78 #endif
    79     }
    80 }
    82 static force_inline uint32_t
    83 over (uint32_t src,
    84       uint32_t dest)
    85 {
    86     uint32_t a = ~src >> 24;
    88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
    90     return dest;
    91 }
    93 static force_inline uint32_t
    94 in (uint32_t x,
    95     uint8_t  y)
    96 {
    97     uint16_t a = y;
    99     UN8x4_MUL_UN8 (x, a);
   101     return x;
   102 }
   104 /*
   105  * Naming convention:
   106  *
   107  *  op_src_mask_dest
   108  */
   109 static void
   110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
   111                                  pixman_composite_info_t *info)
   112 {
   113     PIXMAN_COMPOSITE_ARGS (info);
   114     uint32_t    *src, *src_line;
   115     uint32_t    *dst, *dst_line;
   116     uint8_t     *mask, *mask_line;
   117     int src_stride, mask_stride, dst_stride;
   118     uint8_t m;
   119     uint32_t s, d;
   120     int32_t w;
   122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   126     while (height--)
   127     {
   128 	src = src_line;
   129 	src_line += src_stride;
   130 	dst = dst_line;
   131 	dst_line += dst_stride;
   132 	mask = mask_line;
   133 	mask_line += mask_stride;
   135 	w = width;
   136 	while (w--)
   137 	{
   138 	    m = *mask++;
   139 	    if (m)
   140 	    {
   141 		s = *src | 0xff000000;
   143 		if (m == 0xff)
   144 		{
   145 		    *dst = s;
   146 		}
   147 		else
   148 		{
   149 		    d = in (s, m);
   150 		    *dst = over (d, *dst);
   151 		}
   152 	    }
   153 	    src++;
   154 	    dst++;
   155 	}
   156     }
   157 }
   159 static void
   160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
   161                          pixman_composite_info_t *info)
   162 {
   163     PIXMAN_COMPOSITE_ARGS (info);
   164     uint32_t src, srca;
   165     uint8_t     *dst_line, *dst;
   166     uint8_t     *mask_line, *mask, m;
   167     int dst_stride, mask_stride;
   168     int32_t w;
   169     uint16_t t;
   171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   173     srca = src >> 24;
   175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   178     if (srca == 0xff)
   179     {
   180 	while (height--)
   181 	{
   182 	    dst = dst_line;
   183 	    dst_line += dst_stride;
   184 	    mask = mask_line;
   185 	    mask_line += mask_stride;
   186 	    w = width;
   188 	    while (w--)
   189 	    {
   190 		m = *mask++;
   192 		if (m == 0)
   193 		    *dst = 0;
   194 		else if (m != 0xff)
   195 		    *dst = MUL_UN8 (m, *dst, t);
   197 		dst++;
   198 	    }
   199 	}
   200     }
   201     else
   202     {
   203 	while (height--)
   204 	{
   205 	    dst = dst_line;
   206 	    dst_line += dst_stride;
   207 	    mask = mask_line;
   208 	    mask_line += mask_stride;
   209 	    w = width;
   211 	    while (w--)
   212 	    {
   213 		m = *mask++;
   214 		m = MUL_UN8 (m, srca, t);
   216 		if (m == 0)
   217 		    *dst = 0;
   218 		else if (m != 0xff)
   219 		    *dst = MUL_UN8 (m, *dst, t);
   221 		dst++;
   222 	    }
   223 	}
   224     }
   225 }
   227 static void
   228 fast_composite_in_8_8 (pixman_implementation_t *imp,
   229                        pixman_composite_info_t *info)
   230 {
   231     PIXMAN_COMPOSITE_ARGS (info);
   232     uint8_t     *dst_line, *dst;
   233     uint8_t     *src_line, *src;
   234     int dst_stride, src_stride;
   235     int32_t w;
   236     uint8_t s;
   237     uint16_t t;
   239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
   240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   242     while (height--)
   243     {
   244 	dst = dst_line;
   245 	dst_line += dst_stride;
   246 	src = src_line;
   247 	src_line += src_stride;
   248 	w = width;
   250 	while (w--)
   251 	{
   252 	    s = *src++;
   254 	    if (s == 0)
   255 		*dst = 0;
   256 	    else if (s != 0xff)
   257 		*dst = MUL_UN8 (s, *dst, t);
   259 	    dst++;
   260 	}
   261     }
   262 }
   264 static void
   265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
   266                               pixman_composite_info_t *info)
   267 {
   268     PIXMAN_COMPOSITE_ARGS (info);
   269     uint32_t src, srca;
   270     uint32_t    *dst_line, *dst, d;
   271     uint8_t     *mask_line, *mask, m;
   272     int dst_stride, mask_stride;
   273     int32_t w;
   275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   277     srca = src >> 24;
   278     if (src == 0)
   279 	return;
   281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   284     while (height--)
   285     {
   286 	dst = dst_line;
   287 	dst_line += dst_stride;
   288 	mask = mask_line;
   289 	mask_line += mask_stride;
   290 	w = width;
   292 	while (w--)
   293 	{
   294 	    m = *mask++;
   295 	    if (m == 0xff)
   296 	    {
   297 		if (srca == 0xff)
   298 		    *dst = src;
   299 		else
   300 		    *dst = over (src, *dst);
   301 	    }
   302 	    else if (m)
   303 	    {
   304 		d = in (src, m);
   305 		*dst = over (d, *dst);
   306 	    }
   307 	    dst++;
   308 	}
   309     }
   310 }
   312 static void
   313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
   314 				   pixman_composite_info_t *info)
   315 {
   316     PIXMAN_COMPOSITE_ARGS (info);
   317     uint32_t src, s;
   318     uint32_t    *dst_line, *dst, d;
   319     uint32_t    *mask_line, *mask, ma;
   320     int dst_stride, mask_stride;
   321     int32_t w;
   323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   325     if (src == 0)
   326 	return;
   328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
   331     while (height--)
   332     {
   333 	dst = dst_line;
   334 	dst_line += dst_stride;
   335 	mask = mask_line;
   336 	mask_line += mask_stride;
   337 	w = width;
   339 	while (w--)
   340 	{
   341 	    ma = *mask++;
   343 	    if (ma)
   344 	    {
   345 		d = *dst;
   346 		s = src;
   348 		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
   350 		*dst = s;
   351 	    }
   353 	    dst++;
   354 	}
   355     }
   356 }
   358 static void
   359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
   360                                     pixman_composite_info_t *info)
   361 {
   362     PIXMAN_COMPOSITE_ARGS (info);
   363     uint32_t src, srca, s;
   364     uint32_t    *dst_line, *dst, d;
   365     uint32_t    *mask_line, *mask, ma;
   366     int dst_stride, mask_stride;
   367     int32_t w;
   369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   371     srca = src >> 24;
   372     if (src == 0)
   373 	return;
   375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
   378     while (height--)
   379     {
   380 	dst = dst_line;
   381 	dst_line += dst_stride;
   382 	mask = mask_line;
   383 	mask_line += mask_stride;
   384 	w = width;
   386 	while (w--)
   387 	{
   388 	    ma = *mask++;
   389 	    if (ma == 0xffffffff)
   390 	    {
   391 		if (srca == 0xff)
   392 		    *dst = src;
   393 		else
   394 		    *dst = over (src, *dst);
   395 	    }
   396 	    else if (ma)
   397 	    {
   398 		d = *dst;
   399 		s = src;
   401 		UN8x4_MUL_UN8x4 (s, ma);
   402 		UN8x4_MUL_UN8 (ma, srca);
   403 		ma = ~ma;
   404 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
   406 		*dst = d;
   407 	    }
   409 	    dst++;
   410 	}
   411     }
   412 }
   414 static void
   415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
   416                               pixman_composite_info_t *info)
   417 {
   418     PIXMAN_COMPOSITE_ARGS (info);
   419     uint32_t src, srca;
   420     uint8_t     *dst_line, *dst;
   421     uint32_t d;
   422     uint8_t     *mask_line, *mask, m;
   423     int dst_stride, mask_stride;
   424     int32_t w;
   426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   428     srca = src >> 24;
   429     if (src == 0)
   430 	return;
   432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
   433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   435     while (height--)
   436     {
   437 	dst = dst_line;
   438 	dst_line += dst_stride;
   439 	mask = mask_line;
   440 	mask_line += mask_stride;
   441 	w = width;
   443 	while (w--)
   444 	{
   445 	    m = *mask++;
   446 	    if (m == 0xff)
   447 	    {
   448 		if (srca == 0xff)
   449 		{
   450 		    d = src;
   451 		}
   452 		else
   453 		{
   454 		    d = fetch_24 (dst);
   455 		    d = over (src, d);
   456 		}
   457 		store_24 (dst, d);
   458 	    }
   459 	    else if (m)
   460 	    {
   461 		d = over (in (src, m), fetch_24 (dst));
   462 		store_24 (dst, d);
   463 	    }
   464 	    dst += 3;
   465 	}
   466     }
   467 }
   469 static void
   470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
   471                               pixman_composite_info_t *info)
   472 {
   473     PIXMAN_COMPOSITE_ARGS (info);
   474     uint32_t src, srca;
   475     uint16_t    *dst_line, *dst;
   476     uint32_t d;
   477     uint8_t     *mask_line, *mask, m;
   478     int dst_stride, mask_stride;
   479     int32_t w;
   481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   483     srca = src >> 24;
   484     if (src == 0)
   485 	return;
   487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   490     while (height--)
   491     {
   492 	dst = dst_line;
   493 	dst_line += dst_stride;
   494 	mask = mask_line;
   495 	mask_line += mask_stride;
   496 	w = width;
   498 	while (w--)
   499 	{
   500 	    m = *mask++;
   501 	    if (m == 0xff)
   502 	    {
   503 		if (srca == 0xff)
   504 		{
   505 		    d = src;
   506 		}
   507 		else
   508 		{
   509 		    d = *dst;
   510 		    d = over (src, convert_0565_to_0888 (d));
   511 		}
   512 		*dst = convert_8888_to_0565 (d);
   513 	    }
   514 	    else if (m)
   515 	    {
   516 		d = *dst;
   517 		d = over (in (src, m), convert_0565_to_0888 (d));
   518 		*dst = convert_8888_to_0565 (d);
   519 	    }
   520 	    dst++;
   521 	}
   522     }
   523 }
   525 static void
   526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
   527                                     pixman_composite_info_t *info)
   528 {
   529     PIXMAN_COMPOSITE_ARGS (info);
   530     uint32_t  src, srca, s;
   531     uint16_t  src16;
   532     uint16_t *dst_line, *dst;
   533     uint32_t  d;
   534     uint32_t *mask_line, *mask, ma;
   535     int dst_stride, mask_stride;
   536     int32_t w;
   538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   540     srca = src >> 24;
   541     if (src == 0)
   542 	return;
   544     src16 = convert_8888_to_0565 (src);
   546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
   549     while (height--)
   550     {
   551 	dst = dst_line;
   552 	dst_line += dst_stride;
   553 	mask = mask_line;
   554 	mask_line += mask_stride;
   555 	w = width;
   557 	while (w--)
   558 	{
   559 	    ma = *mask++;
   560 	    if (ma == 0xffffffff)
   561 	    {
   562 		if (srca == 0xff)
   563 		{
   564 		    *dst = src16;
   565 		}
   566 		else
   567 		{
   568 		    d = *dst;
   569 		    d = over (src, convert_0565_to_0888 (d));
   570 		    *dst = convert_8888_to_0565 (d);
   571 		}
   572 	    }
   573 	    else if (ma)
   574 	    {
   575 		d = *dst;
   576 		d = convert_0565_to_0888 (d);
   578 		s = src;
   580 		UN8x4_MUL_UN8x4 (s, ma);
   581 		UN8x4_MUL_UN8 (ma, srca);
   582 		ma = ~ma;
   583 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
   585 		*dst = convert_8888_to_0565 (d);
   586 	    }
   587 	    dst++;
   588 	}
   589     }
   590 }
   592 static void
   593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
   594                                pixman_composite_info_t *info)
   595 {
   596     PIXMAN_COMPOSITE_ARGS (info);
   597     uint32_t    *dst_line, *dst;
   598     uint32_t    *src_line, *src, s;
   599     int dst_stride, src_stride;
   600     uint8_t a;
   601     int32_t w;
   603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   606     while (height--)
   607     {
   608 	dst = dst_line;
   609 	dst_line += dst_stride;
   610 	src = src_line;
   611 	src_line += src_stride;
   612 	w = width;
   614 	while (w--)
   615 	{
   616 	    s = *src++;
   617 	    a = s >> 24;
   618 	    if (a == 0xff)
   619 		*dst = s;
   620 	    else if (s)
   621 		*dst = over (s, *dst);
   622 	    dst++;
   623 	}
   624     }
   625 }
   627 static void
   628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
   629 			      pixman_composite_info_t *info)
   630 {
   631     PIXMAN_COMPOSITE_ARGS (info);
   632     uint32_t    *dst_line, *dst;
   633     uint32_t    *src_line, *src;
   634     int dst_stride, src_stride;
   635     int32_t w;
   637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   640     while (height--)
   641     {
   642 	dst = dst_line;
   643 	dst_line += dst_stride;
   644 	src = src_line;
   645 	src_line += src_stride;
   646 	w = width;
   648 	while (w--)
   649 	    *dst++ = (*src++) | 0xff000000;
   650     }
   651 }
   653 #if 0
   654 static void
   655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
   656 			       pixman_composite_info_t *info)
   657 {
   658     PIXMAN_COMPOSITE_ARGS (info);
   659     uint8_t     *dst_line, *dst;
   660     uint32_t d;
   661     uint32_t    *src_line, *src, s;
   662     uint8_t a;
   663     int dst_stride, src_stride;
   664     int32_t w;
   666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
   667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   669     while (height--)
   670     {
   671 	dst = dst_line;
   672 	dst_line += dst_stride;
   673 	src = src_line;
   674 	src_line += src_stride;
   675 	w = width;
   677 	while (w--)
   678 	{
   679 	    s = *src++;
   680 	    a = s >> 24;
   681 	    if (a)
   682 	    {
   683 		if (a == 0xff)
   684 		    d = s;
   685 		else
   686 		    d = over (s, fetch_24 (dst));
   688 		store_24 (dst, d);
   689 	    }
   690 	    dst += 3;
   691 	}
   692     }
   693 }
   694 #endif
   696 static void
   697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
   698                                pixman_composite_info_t *info)
   699 {
   700     PIXMAN_COMPOSITE_ARGS (info);
   701     uint16_t    *dst_line, *dst;
   702     uint32_t d;
   703     uint32_t    *src_line, *src, s;
   704     uint8_t a;
   705     int dst_stride, src_stride;
   706     int32_t w;
   708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   711     while (height--)
   712     {
   713 	dst = dst_line;
   714 	dst_line += dst_stride;
   715 	src = src_line;
   716 	src_line += src_stride;
   717 	w = width;
   719 	while (w--)
   720 	{
   721 	    s = *src++;
   722 	    a = s >> 24;
   723 	    if (s)
   724 	    {
   725 		if (a == 0xff)
   726 		{
   727 		    d = s;
   728 		}
   729 		else
   730 		{
   731 		    d = *dst;
   732 		    d = over (s, convert_0565_to_0888 (d));
   733 		}
   734 		*dst = convert_8888_to_0565 (d);
   735 	    }
   736 	    dst++;
   737 	}
   738     }
   739 }
   741 static void
   742 fast_composite_add_8_8 (pixman_implementation_t *imp,
   743 			pixman_composite_info_t *info)
   744 {
   745     PIXMAN_COMPOSITE_ARGS (info);
   746     uint8_t     *dst_line, *dst;
   747     uint8_t     *src_line, *src;
   748     int dst_stride, src_stride;
   749     int32_t w;
   750     uint8_t s, d;
   751     uint16_t t;
   753     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
   754     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   756     while (height--)
   757     {
   758 	dst = dst_line;
   759 	dst_line += dst_stride;
   760 	src = src_line;
   761 	src_line += src_stride;
   762 	w = width;
   764 	while (w--)
   765 	{
   766 	    s = *src++;
   767 	    if (s)
   768 	    {
   769 		if (s != 0xff)
   770 		{
   771 		    d = *dst;
   772 		    t = d + s;
   773 		    s = t | (0 - (t >> 8));
   774 		}
   775 		*dst = s;
   776 	    }
   777 	    dst++;
   778 	}
   779     }
   780 }
   782 static void
   783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
   784                               pixman_composite_info_t *info)
   785 {
   786     PIXMAN_COMPOSITE_ARGS (info);
   787     uint16_t    *dst_line, *dst;
   788     uint32_t	d;
   789     uint16_t    *src_line, *src;
   790     uint32_t	s;
   791     int dst_stride, src_stride;
   792     int32_t w;
   794     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
   795     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
   797     while (height--)
   798     {
   799 	dst = dst_line;
   800 	dst_line += dst_stride;
   801 	src = src_line;
   802 	src_line += src_stride;
   803 	w = width;
   805 	while (w--)
   806 	{
   807 	    s = *src++;
   808 	    if (s)
   809 	    {
   810 		d = *dst;
   811 		s = convert_0565_to_8888 (s);
   812 		if (d)
   813 		{
   814 		    d = convert_0565_to_8888 (d);
   815 		    UN8x4_ADD_UN8x4 (s, d);
   816 		}
   817 		*dst = convert_8888_to_0565 (s);
   818 	    }
   819 	    dst++;
   820 	}
   821     }
   822 }
   824 static void
   825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
   826                               pixman_composite_info_t *info)
   827 {
   828     PIXMAN_COMPOSITE_ARGS (info);
   829     uint32_t    *dst_line, *dst;
   830     uint32_t    *src_line, *src;
   831     int dst_stride, src_stride;
   832     int32_t w;
   833     uint32_t s, d;
   835     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
   836     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   838     while (height--)
   839     {
   840 	dst = dst_line;
   841 	dst_line += dst_stride;
   842 	src = src_line;
   843 	src_line += src_stride;
   844 	w = width;
   846 	while (w--)
   847 	{
   848 	    s = *src++;
   849 	    if (s)
   850 	    {
   851 		if (s != 0xffffffff)
   852 		{
   853 		    d = *dst;
   854 		    if (d)
   855 			UN8x4_ADD_UN8x4 (s, d);
   856 		}
   857 		*dst = s;
   858 	    }
   859 	    dst++;
   860 	}
   861     }
   862 }
   864 static void
   865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
   866 			  pixman_composite_info_t *info)
   867 {
   868     PIXMAN_COMPOSITE_ARGS (info);
   869     uint8_t     *dst_line, *dst;
   870     uint8_t     *mask_line, *mask;
   871     int dst_stride, mask_stride;
   872     int32_t w;
   873     uint32_t src;
   874     uint8_t sa;
   876     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
   877     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
   878     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   879     sa = (src >> 24);
   881     while (height--)
   882     {
   883 	dst = dst_line;
   884 	dst_line += dst_stride;
   885 	mask = mask_line;
   886 	mask_line += mask_stride;
   887 	w = width;
   889 	while (w--)
   890 	{
   891 	    uint16_t tmp;
   892 	    uint16_t a;
   893 	    uint32_t m, d;
   894 	    uint32_t r;
   896 	    a = *mask++;
   897 	    d = *dst;
   899 	    m = MUL_UN8 (sa, a, tmp);
   900 	    r = ADD_UN8 (m, d, tmp);
   902 	    *dst++ = r;
   903 	}
   904     }
   905 }
   907 #ifdef WORDS_BIGENDIAN
   908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
   909 #define UPDATE_BITMASK(n) ((n) >> 1)
   910 #else
   911 #define CREATE_BITMASK(n) (1 << (n))
   912 #define UPDATE_BITMASK(n) ((n) << 1)
   913 #endif
   915 #define TEST_BIT(p, n)					\
   916     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
   917 #define SET_BIT(p, n)							\
   918     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
   920 static void
   921 fast_composite_add_1_1 (pixman_implementation_t *imp,
   922 			pixman_composite_info_t *info)
   923 {
   924     PIXMAN_COMPOSITE_ARGS (info);
   925     uint32_t     *dst_line, *dst;
   926     uint32_t     *src_line, *src;
   927     int           dst_stride, src_stride;
   928     int32_t       w;
   930     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
   931                            src_stride, src_line, 1);
   932     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
   933                            dst_stride, dst_line, 1);
   935     while (height--)
   936     {
   937 	dst = dst_line;
   938 	dst_line += dst_stride;
   939 	src = src_line;
   940 	src_line += src_stride;
   941 	w = width;
   943 	while (w--)
   944 	{
   945 	    /*
   946 	     * TODO: improve performance by processing uint32_t data instead
   947 	     *       of individual bits
   948 	     */
   949 	    if (TEST_BIT (src, src_x + w))
   950 		SET_BIT (dst, dest_x + w);
   951 	}
   952     }
   953 }
   955 static void
   956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
   957                               pixman_composite_info_t *info)
   958 {
   959     PIXMAN_COMPOSITE_ARGS (info);
   960     uint32_t     src, srca;
   961     uint32_t    *dst, *dst_line;
   962     uint32_t    *mask, *mask_line;
   963     int          mask_stride, dst_stride;
   964     uint32_t     bitcache, bitmask;
   965     int32_t      w;
   967     if (width <= 0)
   968 	return;
   970     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   971     srca = src >> 24;
   972     if (src == 0)
   973 	return;
   975     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
   976                            dst_stride, dst_line, 1);
   977     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
   978                            mask_stride, mask_line, 1);
   979     mask_line += mask_x >> 5;
   981     if (srca == 0xff)
   982     {
   983 	while (height--)
   984 	{
   985 	    dst = dst_line;
   986 	    dst_line += dst_stride;
   987 	    mask = mask_line;
   988 	    mask_line += mask_stride;
   989 	    w = width;
   991 	    bitcache = *mask++;
   992 	    bitmask = CREATE_BITMASK (mask_x & 31);
   994 	    while (w--)
   995 	    {
   996 		if (bitmask == 0)
   997 		{
   998 		    bitcache = *mask++;
   999 		    bitmask = CREATE_BITMASK (0);
  1001 		if (bitcache & bitmask)
  1002 		    *dst = src;
  1003 		bitmask = UPDATE_BITMASK (bitmask);
  1004 		dst++;
  1008     else
  1010 	while (height--)
  1012 	    dst = dst_line;
  1013 	    dst_line += dst_stride;
  1014 	    mask = mask_line;
  1015 	    mask_line += mask_stride;
  1016 	    w = width;
  1018 	    bitcache = *mask++;
  1019 	    bitmask = CREATE_BITMASK (mask_x & 31);
  1021 	    while (w--)
  1023 		if (bitmask == 0)
  1025 		    bitcache = *mask++;
  1026 		    bitmask = CREATE_BITMASK (0);
  1028 		if (bitcache & bitmask)
  1029 		    *dst = over (src, *dst);
  1030 		bitmask = UPDATE_BITMASK (bitmask);
  1031 		dst++;
  1037 static void
  1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
  1039                               pixman_composite_info_t *info)
  1041     PIXMAN_COMPOSITE_ARGS (info);
  1042     uint32_t     src, srca;
  1043     uint16_t    *dst, *dst_line;
  1044     uint32_t    *mask, *mask_line;
  1045     int          mask_stride, dst_stride;
  1046     uint32_t     bitcache, bitmask;
  1047     int32_t      w;
  1048     uint32_t     d;
  1049     uint16_t     src565;
  1051     if (width <= 0)
  1052 	return;
  1054     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
  1055     srca = src >> 24;
  1056     if (src == 0)
  1057 	return;
  1059     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
  1060                            dst_stride, dst_line, 1);
  1061     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
  1062                            mask_stride, mask_line, 1);
  1063     mask_line += mask_x >> 5;
  1065     if (srca == 0xff)
  1067 	src565 = convert_8888_to_0565 (src);
  1068 	while (height--)
  1070 	    dst = dst_line;
  1071 	    dst_line += dst_stride;
  1072 	    mask = mask_line;
  1073 	    mask_line += mask_stride;
  1074 	    w = width;
  1076 	    bitcache = *mask++;
  1077 	    bitmask = CREATE_BITMASK (mask_x & 31);
  1079 	    while (w--)
  1081 		if (bitmask == 0)
  1083 		    bitcache = *mask++;
  1084 		    bitmask = CREATE_BITMASK (0);
  1086 		if (bitcache & bitmask)
  1087 		    *dst = src565;
  1088 		bitmask = UPDATE_BITMASK (bitmask);
  1089 		dst++;
  1093     else
  1095 	while (height--)
  1097 	    dst = dst_line;
  1098 	    dst_line += dst_stride;
  1099 	    mask = mask_line;
  1100 	    mask_line += mask_stride;
  1101 	    w = width;
  1103 	    bitcache = *mask++;
  1104 	    bitmask = CREATE_BITMASK (mask_x & 31);
  1106 	    while (w--)
  1108 		if (bitmask == 0)
  1110 		    bitcache = *mask++;
  1111 		    bitmask = CREATE_BITMASK (0);
  1113 		if (bitcache & bitmask)
  1115 		    d = over (src, convert_0565_to_0888 (*dst));
  1116 		    *dst = convert_8888_to_0565 (d);
  1118 		bitmask = UPDATE_BITMASK (bitmask);
  1119 		dst++;
  1125 /*
  1126  * Simple bitblt
  1127  */
  1129 static void
  1130 fast_composite_solid_fill (pixman_implementation_t *imp,
  1131                            pixman_composite_info_t *info)
  1133     PIXMAN_COMPOSITE_ARGS (info);
  1134     uint32_t src;
  1136     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
  1138     if (dest_image->bits.format == PIXMAN_a1)
  1140 	src = src >> 31;
  1142     else if (dest_image->bits.format == PIXMAN_a8)
  1144 	src = src >> 24;
  1146     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
  1147              dest_image->bits.format == PIXMAN_b5g6r5)
  1149 	src = convert_8888_to_0565 (src);
  1152     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
  1153                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
  1154                  dest_x, dest_y,
  1155                  width, height,
  1156                  src);
  1159 static void
  1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
  1161 			   pixman_composite_info_t *info)
  1163     PIXMAN_COMPOSITE_ARGS (info);
  1164     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
  1165     uint32_t n_bytes = width * bpp;
  1166     int dst_stride, src_stride;
  1167     uint8_t    *dst;
  1168     uint8_t    *src;
  1170     src_stride = src_image->bits.rowstride * 4;
  1171     dst_stride = dest_image->bits.rowstride * 4;
  1173     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
  1174     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
  1176     while (height--)
  1178 	memcpy (dst, src, n_bytes);
  1180 	dst += dst_stride;
  1181 	src += src_stride;
  1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
  1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
  1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
  1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
  1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
  1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
  1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
  1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
  1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
  1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
  1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
  1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
  1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
  1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
  1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
  1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
  1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
  1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
  1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
  1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
  1206 static force_inline void
  1207 scaled_bilinear_scanline_8888_565_OVER (uint16_t *       dst,
  1208                                         const uint32_t * mask,
  1209                                         const uint32_t * src_top,
  1210                                         const uint32_t * src_bottom,
  1211                                         int32_t          w,
  1212                                         int              wt,
  1213                                         int              wb,
  1214                                         pixman_fixed_t   vx,
  1215                                         pixman_fixed_t   unit_x,
  1216                                         pixman_fixed_t   max_vx,
  1217                                         pixman_bool_t    zero_src)
  1219     while ((w -= 1) >= 0)
  1221 	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
  1222 	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1223 	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1224 	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1225 	uint32_t src, result;
  1226 	uint16_t d;
  1227 	d = *dst;
  1228 	src = bilinear_interpolation (tl, tr,
  1229 				      bl, br,
  1230 				      pixman_fixed_to_bilinear_weight(vx),
  1231 				      wb);
  1232 	vx += unit_x;
  1233 	result = over (src, convert_0565_to_0888 (d));
  1234 	*dst++ = convert_8888_to_0565 (result);
  1238 static force_inline void
  1239 scaled_bilinear_scanline_8888_8888_OVER (uint32_t *       dst,
  1240                                          const uint32_t * mask,
  1241                                          const uint32_t * src_top,
  1242                                          const uint32_t * src_bottom,
  1243                                          int32_t          w,
  1244                                          int              wt,
  1245                                          int              wb,
  1246                                          pixman_fixed_t   vx,
  1247                                          pixman_fixed_t   unit_x,
  1248                                          pixman_fixed_t   max_vx,
  1249                                          pixman_bool_t    zero_src)
  1251     while ((w -= 1) >= 0)
  1253 	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
  1254 	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1255 	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1256 	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1257 	uint32_t src;
  1258 	uint32_t d;
  1259 	uint32_t result;
  1260 	d = *dst;
  1261 	src = bilinear_interpolation (tl, tr,
  1262 				      bl, br,
  1263 				      pixman_fixed_to_bilinear_weight(vx),
  1264 				      wb);
  1265 	vx += unit_x;
  1266 	*dst++ = over (src, d);
  1270 #ifndef LOWER_QUALITY_INTERPOLATION
  1272 static force_inline void
  1273 scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
  1274 				      const uint32_t * mask,
  1275 				      const uint16_t * src_top,
  1276 				      const uint16_t * src_bottom,
  1277 				      int32_t          w,
  1278 				      int              wt,
  1279 				      int              wb,
  1280 				      pixman_fixed_t   vx,
  1281 				      pixman_fixed_t   unit_x,
  1282 				      pixman_fixed_t   max_vx,
  1283 				      pixman_bool_t    zero_src)
  1285     while ((w -= 1) >= 0)
  1287 	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
  1288 	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1289 	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1290 	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1291 	uint32_t d;
  1292 	d = bilinear_interpolation(convert_0565_to_8888 (tl),
  1293 				   convert_0565_to_8888 (tr),
  1294 				   convert_0565_to_8888 (bl),
  1295 				   convert_0565_to_8888 (br),
  1296 				   pixman_fixed_to_bilinear_weight (vx),
  1297 				   wb);
  1298 	vx += unit_x;
  1299 	*dst++ = convert_8888_to_0565 (d);
  1303 #else
  1305 /* This is a clever low resolution bilinear interpolation inspired by the code
  1306    in Skia */
  1308 /* This takes the green component from the 565 representation and moves it:
  1309    00000000 00000000 rrrrrggg gggbbbbb
  1311    00000ggg ggg00000 rrrrr000 000bbbbb
  1313    This gives us 5 extra bits of space before each component to let us do
  1314    SWAR style optimizations
  1315 */
  1317 #define GREEN_MASK (((1 << 6) - 1) << 5)
  1319 static inline uint32_t
  1320 expand_rgb_565 (uint16_t c) {
  1321     return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK);
  1324 static inline uint16_t
  1325 compact_rgb_565 (uint32_t c) {
  1326     return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK);
  1329 static inline uint16_t
  1330 bilinear_interpolation_565(uint16_t tl, uint16_t tr,
  1331 			   uint16_t bl, uint16_t br,
  1332 			   int x, int y)
  1334     int xy;
  1335     uint32_t a00 = expand_rgb_565 (tl);
  1336     uint32_t a01 = expand_rgb_565 (tr);
  1337     uint32_t a10 = expand_rgb_565 (bl);
  1338     uint32_t a11 = expand_rgb_565 (br);
  1340     xy = (x * y) >> 3;
  1341     return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) +
  1342 			     a01 * (2*x - xy) +
  1343 			     a10 * (2*y - xy) +
  1344 			     a11 * xy) >> 5);
  1347 static force_inline void
  1348 scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
  1349 				      const uint32_t * mask,
  1350 				      const uint16_t * src_top,
  1351 				      const uint16_t * src_bottom,
  1352 				      int32_t          w,
  1353 				      int              wt,
  1354 				      int              wb,
  1355 				      pixman_fixed_t   vx,
  1356 				      pixman_fixed_t   unit_x,
  1357 				      pixman_fixed_t   max_vx,
  1358 				      pixman_bool_t    zero_src)
  1360     while ((w -= 1) >= 0)
  1362 	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
  1363 	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
  1364 	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
  1365 	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
  1367         uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br,
  1368 						 pixman_fixed_to_bilinear_weight(vx),
  1369 						 wb);
  1370         vx += unit_x;
  1371         *dst++ = d;
  1375 #endif
  1377 FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
  1378 			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1379 			       uint16_t, uint32_t, uint16_t,
  1380 			       COVER, FLAG_NONE)
  1381 FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
  1382 			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1383 			       uint16_t, uint32_t, uint16_t,
  1384 			       PAD, FLAG_NONE)
  1385 FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
  1386 			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1387 			       uint16_t, uint32_t, uint16_t,
  1388 			       NONE, FLAG_NONE)
  1389 FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
  1390 			       scaled_bilinear_scanline_565_565_SRC, NULL,
  1391 			       uint16_t, uint32_t, uint16_t,
  1392 			       NORMAL, FLAG_NONE)
  1394 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
  1395 			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1396 			       uint32_t, uint32_t, uint16_t,
  1397 			       COVER, FLAG_NONE)
  1398 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
  1399 			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1400 			       uint32_t, uint32_t, uint16_t,
  1401 			       PAD, FLAG_NONE)
  1402 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
  1403 			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1404 			       uint32_t, uint32_t, uint16_t,
  1405 			       NONE, FLAG_NONE)
  1406 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
  1407 			       scaled_bilinear_scanline_8888_565_OVER, NULL,
  1408 			       uint32_t, uint32_t, uint16_t,
  1409 			       NORMAL, FLAG_NONE)
  1411 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
  1412 			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1413 			       uint32_t, uint32_t, uint32_t,
  1414 			       COVER, FLAG_NONE)
  1415 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
  1416 			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1417 			       uint32_t, uint32_t, uint32_t,
  1418 			       PAD, FLAG_NONE)
  1419 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
  1420 			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1421 			       uint32_t, uint32_t, uint32_t,
  1422 			       NONE, FLAG_NONE)
  1423 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
  1424 			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
  1425 			       uint32_t, uint32_t, uint32_t,
  1426 			       NORMAL, FLAG_NONE)
  1428 #define REPEAT_MIN_WIDTH    32
  1430 static void
  1431 fast_composite_tiled_repeat (pixman_implementation_t *imp,
  1432 			     pixman_composite_info_t *info)
  1434     PIXMAN_COMPOSITE_ARGS (info);
  1435     pixman_composite_func_t func;
  1436     pixman_format_code_t mask_format;
  1437     uint32_t src_flags, mask_flags;
  1438     int32_t sx, sy;
  1439     int32_t width_remain;
  1440     int32_t num_pixels;
  1441     int32_t src_width;
  1442     int32_t i, j;
  1443     pixman_image_t extended_src_image;
  1444     uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
  1445     pixman_bool_t need_src_extension;
  1446     uint32_t *src_line;
  1447     int32_t src_stride;
  1448     int32_t src_bpp;
  1449     pixman_composite_info_t info2 = *info;
  1451     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
  1452 		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
  1454     if (mask_image)
  1456 	mask_format = mask_image->common.extended_format_code;
  1457 	mask_flags = info->mask_flags;
  1459     else
  1461 	mask_format = PIXMAN_null;
  1462 	mask_flags = FAST_PATH_IS_OPAQUE;
  1465     _pixman_implementation_lookup_composite (
  1466 	imp->toplevel, info->op,
  1467 	src_image->common.extended_format_code, src_flags,
  1468 	mask_format, mask_flags,
  1469 	dest_image->common.extended_format_code, info->dest_flags,
  1470 	&imp, &func);
  1472     src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
  1474     if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
  1475 	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
  1476 	!src_image->bits.indexed)
  1478 	sx = src_x;
  1479 	sx = MOD (sx, src_image->bits.width);
  1480 	sx += width;
  1481 	src_width = 0;
  1483 	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
  1484 	    src_width += src_image->bits.width;
  1486 	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
  1488 	/* Initialize/validate stack-allocated temporary image */
  1489 	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
  1490 				 src_width, 1, &extended_src[0], src_stride,
  1491 				 FALSE);
  1492 	_pixman_image_validate (&extended_src_image);
  1494 	info2.src_image = &extended_src_image;
  1495 	need_src_extension = TRUE;
  1497     else
  1499 	src_width = src_image->bits.width;
  1500 	need_src_extension = FALSE;
  1503     sx = src_x;
  1504     sy = src_y;
  1506     while (--height >= 0)
  1508 	sx = MOD (sx, src_width);
  1509 	sy = MOD (sy, src_image->bits.height);
  1511 	if (need_src_extension)
  1513 	    if (src_bpp == 32)
  1515 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
  1517 		for (i = 0; i < src_width; )
  1519 		    for (j = 0; j < src_image->bits.width; j++, i++)
  1520 			extended_src[i] = src_line[j];
  1523 	    else if (src_bpp == 16)
  1525 		uint16_t *src_line_16;
  1527 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
  1528 				       src_line_16, 1);
  1529 		src_line = (uint32_t*)src_line_16;
  1531 		for (i = 0; i < src_width; )
  1533 		    for (j = 0; j < src_image->bits.width; j++, i++)
  1534 			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
  1537 	    else if (src_bpp == 8)
  1539 		uint8_t *src_line_8;
  1541 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
  1542 				       src_line_8, 1);
  1543 		src_line = (uint32_t*)src_line_8;
  1545 		for (i = 0; i < src_width; )
  1547 		    for (j = 0; j < src_image->bits.width; j++, i++)
  1548 			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
  1552 	    info2.src_y = 0;
  1554 	else
  1556 	    info2.src_y = sy;
  1559 	width_remain = width;
  1561 	while (width_remain > 0)
  1563 	    num_pixels = src_width - sx;
  1565 	    if (num_pixels > width_remain)
  1566 		num_pixels = width_remain;
  1568 	    info2.src_x = sx;
  1569 	    info2.width = num_pixels;
  1570 	    info2.height = 1;
  1572 	    func (imp, &info2);
  1574 	    width_remain -= num_pixels;
  1575 	    info2.mask_x += num_pixels;
  1576 	    info2.dest_x += num_pixels;
  1577 	    sx = 0;
  1580 	sx = src_x;
  1581 	sy++;
  1582 	info2.mask_x = info->mask_x;
  1583 	info2.mask_y++;
  1584 	info2.dest_x = info->dest_x;
  1585 	info2.dest_y++;
  1588     if (need_src_extension)
  1589 	_pixman_image_fini (&extended_src_image);
  1592 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
  1593 static force_inline void
  1594 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
  1595 				     const uint16_t * src,
  1596 				     int32_t          w,
  1597 				     pixman_fixed_t   vx,
  1598 				     pixman_fixed_t   unit_x,
  1599 				     pixman_fixed_t   max_vx,
  1600 				     pixman_bool_t    fully_transparent_src)
  1602     uint16_t tmp1, tmp2, tmp3, tmp4;
  1603     while ((w -= 4) >= 0)
  1605 	tmp1 = *(src + pixman_fixed_to_int (vx));
  1606 	vx += unit_x;
  1607 	tmp2 = *(src + pixman_fixed_to_int (vx));
  1608 	vx += unit_x;
  1609 	tmp3 = *(src + pixman_fixed_to_int (vx));
  1610 	vx += unit_x;
  1611 	tmp4 = *(src + pixman_fixed_to_int (vx));
  1612 	vx += unit_x;
  1613 	*dst++ = tmp1;
  1614 	*dst++ = tmp2;
  1615 	*dst++ = tmp3;
  1616 	*dst++ = tmp4;
  1618     if (w & 2)
  1620 	tmp1 = *(src + pixman_fixed_to_int (vx));
  1621 	vx += unit_x;
  1622 	tmp2 = *(src + pixman_fixed_to_int (vx));
  1623 	vx += unit_x;
  1624 	*dst++ = tmp1;
  1625 	*dst++ = tmp2;
  1627     if (w & 1)
  1628 	*dst = *(src + pixman_fixed_to_int (vx));
  1631 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
  1632 		       scaled_nearest_scanline_565_565_SRC,
  1633 		       uint16_t, uint16_t, COVER)
  1634 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
  1635 		       scaled_nearest_scanline_565_565_SRC,
  1636 		       uint16_t, uint16_t, NONE)
  1637 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
  1638 		       scaled_nearest_scanline_565_565_SRC,
  1639 		       uint16_t, uint16_t, PAD)
  1641 static force_inline uint32_t
  1642 fetch_nearest (pixman_repeat_t src_repeat,
  1643 	       pixman_format_code_t format,
  1644 	       uint32_t *src, int x, int src_width)
  1646     if (repeat (src_repeat, &x, src_width))
  1648 	if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
  1649 	    return *(src + x) | 0xff000000;
  1650 	else
  1651 	    return *(src + x);
  1653     else
  1655 	return 0;
  1659 static force_inline void
  1660 combine_over (uint32_t s, uint32_t *dst)
  1662     if (s)
  1664 	uint8_t ia = 0xff - (s >> 24);
  1666 	if (ia)
  1667 	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
  1668 	else
  1669 	    *dst = s;
  1673 static force_inline void
  1674 combine_src (uint32_t s, uint32_t *dst)
  1676     *dst = s;
  1679 static void
  1680 fast_composite_scaled_nearest (pixman_implementation_t *imp,
  1681 			       pixman_composite_info_t *info)
  1683     PIXMAN_COMPOSITE_ARGS (info);
  1684     uint32_t       *dst_line;
  1685     uint32_t       *src_line;
  1686     int             dst_stride, src_stride;
  1687     int		    src_width, src_height;
  1688     pixman_repeat_t src_repeat;
  1689     pixman_fixed_t unit_x, unit_y;
  1690     pixman_format_code_t src_format;
  1691     pixman_vector_t v;
  1692     pixman_fixed_t vy;
  1694     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
  1695     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
  1696      * transformed from destination space to source space
  1697      */
  1698     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
  1700     /* reference point is the center of the pixel */
  1701     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
  1702     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
  1703     v.vector[2] = pixman_fixed_1;
  1705     if (!pixman_transform_point_3d (src_image->common.transform, &v))
  1706 	return;
  1708     unit_x = src_image->common.transform->matrix[0][0];
  1709     unit_y = src_image->common.transform->matrix[1][1];
  1711     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
  1712     v.vector[0] -= pixman_fixed_e;
  1713     v.vector[1] -= pixman_fixed_e;
  1715     src_height = src_image->bits.height;
  1716     src_width = src_image->bits.width;
  1717     src_repeat = src_image->common.repeat;
  1718     src_format = src_image->bits.format;
  1720     vy = v.vector[1];
  1721     while (height--)
  1723         pixman_fixed_t vx = v.vector[0];
  1724 	int y = pixman_fixed_to_int (vy);
  1725 	uint32_t *dst = dst_line;
  1727 	dst_line += dst_stride;
  1729         /* adjust the y location by a unit vector in the y direction
  1730          * this is equivalent to transforming y+1 of the destination point to source space */
  1731         vy += unit_y;
  1733 	if (!repeat (src_repeat, &y, src_height))
  1735 	    if (op == PIXMAN_OP_SRC)
  1736 		memset (dst, 0, sizeof (*dst) * width);
  1738 	else
  1740 	    int w = width;
  1742 	    uint32_t *src = src_line + y * src_stride;
  1744 	    while (w >= 2)
  1746 		uint32_t s1, s2;
  1747 		int x1, x2;
  1749 		x1 = pixman_fixed_to_int (vx);
  1750 		vx += unit_x;
  1752 		x2 = pixman_fixed_to_int (vx);
  1753 		vx += unit_x;
  1755 		w -= 2;
  1757 		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
  1758 		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
  1760 		if (op == PIXMAN_OP_OVER)
  1762 		    combine_over (s1, dst++);
  1763 		    combine_over (s2, dst++);
  1765 		else
  1767 		    combine_src (s1, dst++);
  1768 		    combine_src (s2, dst++);
  1772 	    while (w--)
  1774 		uint32_t s;
  1775 		int x;
  1777 		x = pixman_fixed_to_int (vx);
  1778 		vx += unit_x;
  1780 		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
  1782 		if (op == PIXMAN_OP_OVER)
  1783 		    combine_over (s, dst++);
  1784 		else
  1785 		    combine_src (s, dst++);
  1791 #define CACHE_LINE_SIZE 64
  1793 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
  1795 static void                                                                   \
  1796 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
  1797 				 int             dst_stride,                  \
  1798 				 const pix_type *src,                         \
  1799 				 int             src_stride,                  \
  1800 				 int             w,                           \
  1801 				 int             h)                           \
  1802 {                                                                             \
  1803     int x, y;                                                                 \
  1804     for (y = 0; y < h; y++)                                                   \
  1805     {                                                                         \
  1806 	const pix_type *s = src + (h - y - 1);                                \
  1807 	pix_type *d = dst + dst_stride * y;                                   \
  1808 	for (x = 0; x < w; x++)                                               \
  1809 	{                                                                     \
  1810 	    *d++ = *s;                                                        \
  1811 	    s += src_stride;                                                  \
  1812 	}                                                                     \
  1813     }                                                                         \
  1814 }                                                                             \
  1816 static void                                                                   \
  1817 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
  1818 				  int             dst_stride,                 \
  1819 				  const pix_type *src,                        \
  1820 				  int             src_stride,                 \
  1821 				  int             w,                          \
  1822 				  int             h)                          \
  1823 {                                                                             \
  1824     int x, y;                                                                 \
  1825     for (y = 0; y < h; y++)                                                   \
  1826     {                                                                         \
  1827 	const pix_type *s = src + src_stride * (w - 1) + y;                   \
  1828 	pix_type *d = dst + dst_stride * y;                                   \
  1829 	for (x = 0; x < w; x++)                                               \
  1830 	{                                                                     \
  1831 	    *d++ = *s;                                                        \
  1832 	    s -= src_stride;                                                  \
  1833 	}                                                                     \
  1834     }                                                                         \
  1835 }                                                                             \
  1837 static void                                                                   \
  1838 blt_rotated_90_##suffix (pix_type       *dst,                                 \
  1839 			 int             dst_stride,                          \
  1840 			 const pix_type *src,                                 \
  1841 			 int             src_stride,                          \
  1842 			 int             W,                                   \
  1843 			 int             H)                                   \
  1844 {                                                                             \
  1845     int x;                                                                    \
  1846     int leading_pixels = 0, trailing_pixels = 0;                              \
  1847     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
  1849     /*                                                                        \
  1850      * split processing into handling destination as TILE_SIZExH cache line   \
  1851      * aligned vertical stripes (optimistically assuming that destination     \
  1852      * stride is a multiple of cache line, if not - it will be just a bit     \
  1853      * slower)                                                                \
  1854      */                                                                       \
  1856     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
  1857     {                                                                         \
  1858 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
  1859 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1860 	if (leading_pixels > W)                                               \
  1861 	    leading_pixels = W;                                               \
  1863 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
  1864 	blt_rotated_90_trivial_##suffix (                                     \
  1865 	    dst,                                                              \
  1866 	    dst_stride,                                                       \
  1867 	    src,                                                              \
  1868 	    src_stride,                                                       \
  1869 	    leading_pixels,                                                   \
  1870 	    H);                                                               \
  1872 	dst += leading_pixels;                                                \
  1873 	src += leading_pixels * src_stride;                                   \
  1874 	W -= leading_pixels;                                                  \
  1875     }                                                                         \
  1877     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
  1878     {                                                                         \
  1879 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
  1880 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1881 	if (trailing_pixels > W)                                              \
  1882 	    trailing_pixels = W;                                              \
  1883 	W -= trailing_pixels;                                                 \
  1884     }                                                                         \
  1886     for (x = 0; x < W; x += TILE_SIZE)                                        \
  1887     {                                                                         \
  1888 	/* aligned middle part TILE_SIZExH */                                 \
  1889 	blt_rotated_90_trivial_##suffix (                                     \
  1890 	    dst + x,                                                          \
  1891 	    dst_stride,                                                       \
  1892 	    src + src_stride * x,                                             \
  1893 	    src_stride,                                                       \
  1894 	    TILE_SIZE,                                                        \
  1895 	    H);                                                               \
  1896     }                                                                         \
  1898     if (trailing_pixels)                                                      \
  1899     {                                                                         \
  1900 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
  1901 	blt_rotated_90_trivial_##suffix (                                     \
  1902 	    dst + W,                                                          \
  1903 	    dst_stride,                                                       \
  1904 	    src + W * src_stride,                                             \
  1905 	    src_stride,                                                       \
  1906 	    trailing_pixels,                                                  \
  1907 	    H);                                                               \
  1908     }                                                                         \
  1909 }                                                                             \
  1911 static void                                                                   \
  1912 blt_rotated_270_##suffix (pix_type       *dst,                                \
  1913 			  int             dst_stride,                         \
  1914 			  const pix_type *src,                                \
  1915 			  int             src_stride,                         \
  1916 			  int             W,                                  \
  1917 			  int             H)                                  \
  1918 {                                                                             \
  1919     int x;                                                                    \
  1920     int leading_pixels = 0, trailing_pixels = 0;                              \
  1921     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
  1923     /*                                                                        \
  1924      * split processing into handling destination as TILE_SIZExH cache line   \
  1925      * aligned vertical stripes (optimistically assuming that destination     \
  1926      * stride is a multiple of cache line, if not - it will be just a bit     \
  1927      * slower)                                                                \
  1928      */                                                                       \
  1930     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
  1931     {                                                                         \
  1932 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
  1933 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1934 	if (leading_pixels > W)                                               \
  1935 	    leading_pixels = W;                                               \
  1937 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
  1938 	blt_rotated_270_trivial_##suffix (                                    \
  1939 	    dst,                                                              \
  1940 	    dst_stride,                                                       \
  1941 	    src + src_stride * (W - leading_pixels),                          \
  1942 	    src_stride,                                                       \
  1943 	    leading_pixels,                                                   \
  1944 	    H);                                                               \
  1946 	dst += leading_pixels;                                                \
  1947 	W -= leading_pixels;                                                  \
  1948     }                                                                         \
  1950     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
  1951     {                                                                         \
  1952 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
  1953 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
  1954 	if (trailing_pixels > W)                                              \
  1955 	    trailing_pixels = W;                                              \
  1956 	W -= trailing_pixels;                                                 \
  1957 	src += trailing_pixels * src_stride;                                  \
  1958     }                                                                         \
  1960     for (x = 0; x < W; x += TILE_SIZE)                                        \
  1961     {                                                                         \
  1962 	/* aligned middle part TILE_SIZExH */                                 \
  1963 	blt_rotated_270_trivial_##suffix (                                    \
  1964 	    dst + x,                                                          \
  1965 	    dst_stride,                                                       \
  1966 	    src + src_stride * (W - x - TILE_SIZE),                           \
  1967 	    src_stride,                                                       \
  1968 	    TILE_SIZE,                                                        \
  1969 	    H);                                                               \
  1970     }                                                                         \
  1972     if (trailing_pixels)                                                      \
  1973     {                                                                         \
  1974 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
  1975 	blt_rotated_270_trivial_##suffix (                                    \
  1976 	    dst + W,                                                          \
  1977 	    dst_stride,                                                       \
  1978 	    src - trailing_pixels * src_stride,                               \
  1979 	    src_stride,                                                       \
  1980 	    trailing_pixels,                                                  \
  1981 	    H);                                                               \
  1982     }                                                                         \
  1983 }                                                                             \
  1985 static void                                                                   \
  1986 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
  1987 				   pixman_composite_info_t *info)	      \
  1988 {									      \
  1989     PIXMAN_COMPOSITE_ARGS (info);					      \
  1990     pix_type       *dst_line;						      \
  1991     pix_type       *src_line;                                                 \
  1992     int             dst_stride, src_stride;                                   \
  1993     int             src_x_t, src_y_t;                                         \
  1995     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
  1996 			   dst_stride, dst_line, 1);                          \
  1997     src_x_t = -src_y + pixman_fixed_to_int (                                  \
  1998 				src_image->common.transform->matrix[0][2] +   \
  1999 				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
  2000     src_y_t = src_x + pixman_fixed_to_int (                                   \
  2001 				src_image->common.transform->matrix[1][2] +   \
  2002 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
  2003     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
  2004 			   src_stride, src_line, 1);                          \
  2005     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
  2006 			     width, height);                                  \
  2007 }                                                                             \
  2009 static void                                                                   \
  2010 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
  2011 				    pixman_composite_info_t *info)            \
  2012 {                                                                             \
  2013     PIXMAN_COMPOSITE_ARGS (info);					      \
  2014     pix_type       *dst_line;						      \
  2015     pix_type       *src_line;                                                 \
  2016     int             dst_stride, src_stride;                                   \
  2017     int             src_x_t, src_y_t;                                         \
  2019     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
  2020 			   dst_stride, dst_line, 1);                          \
  2021     src_x_t = src_y + pixman_fixed_to_int (                                   \
  2022 				src_image->common.transform->matrix[0][2] +   \
  2023 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
  2024     src_y_t = -src_x + pixman_fixed_to_int (                                  \
  2025 				src_image->common.transform->matrix[1][2] +   \
  2026 				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
  2027     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
  2028 			   src_stride, src_line, 1);                          \
  2029     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
  2030 			      width, height);                                 \
  2033 FAST_SIMPLE_ROTATE (8, uint8_t)
  2034 FAST_SIMPLE_ROTATE (565, uint16_t)
  2035 FAST_SIMPLE_ROTATE (8888, uint32_t)
  2037 static const pixman_fast_path_t c_fast_paths[] =
  2039     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
  2040     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
  2041     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
  2042     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
  2043     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
  2044     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
  2045     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
  2046     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
  2047     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
  2048     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
  2049     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
  2050     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
  2051     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
  2052     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
  2053     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
  2054     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
  2055     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
  2056     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
  2057     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
  2058     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
  2059     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
  2060     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
  2061     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
  2062     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
  2063     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
  2064     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
  2065     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
  2066     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
  2067     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
  2068     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
  2069     PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
  2070     PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
  2071     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
  2072     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
  2073     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
  2074     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
  2075     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
  2076     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
  2077     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
  2078     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
  2079     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
  2080     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
  2081     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
  2082     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
  2083     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
  2084     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
  2085     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
  2086     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
  2087     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
  2088     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
  2089     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
  2090     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
  2091     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
  2092     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
  2093     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
  2094     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
  2095     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
  2096     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
  2097     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
  2098     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
  2099     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
  2100     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
  2101     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
  2102     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
  2103     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
  2105     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
  2106     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
  2107     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
  2108     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
  2110     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
  2111     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
  2113     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
  2114     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
  2116     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
  2118     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
  2119     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
  2120     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
  2121     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
  2122     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
  2123     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
  2125     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
  2126     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
  2127     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
  2128     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
  2130     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
  2132 #define NEAREST_FAST_PATH(op,s,d)		\
  2133     {   PIXMAN_OP_ ## op,			\
  2134 	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
  2135 	PIXMAN_null, 0,				\
  2136 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
  2137 	fast_composite_scaled_nearest,		\
  2140     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
  2141     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
  2142     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
  2143     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
  2145     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
  2146     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
  2147     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
  2148     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
  2150     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
  2151     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
  2152     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
  2153     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
  2155     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
  2156     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
  2157     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
  2158     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
  2160 #define SIMPLE_ROTATE_FLAGS(angle)					  \
  2161     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
  2162      FAST_PATH_NEAREST_FILTER			|			  \
  2163      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
  2164      FAST_PATH_STANDARD_FLAGS)
  2166 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
  2167     {   PIXMAN_OP_ ## op,						  \
  2168 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
  2169 	PIXMAN_null, 0,							  \
  2170 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
  2171 	fast_composite_rotate_90_##suffix,				  \
  2172     },									  \
  2173     {   PIXMAN_OP_ ## op,						  \
  2174 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
  2175 	PIXMAN_null, 0,							  \
  2176 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
  2177 	fast_composite_rotate_270_##suffix,				  \
  2180     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
  2181     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
  2182     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
  2183     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
  2184     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
  2186     /* Simple repeat fast path entry. */
  2187     {	PIXMAN_OP_any,
  2188 	PIXMAN_any,
  2189 	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
  2190 	 FAST_PATH_NORMAL_REPEAT),
  2191 	PIXMAN_any, 0,
  2192 	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
  2193 	fast_composite_tiled_repeat
  2194     },
  2196     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
  2197     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
  2198     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
  2200     {   PIXMAN_OP_NONE	},
  2201 };
  2203 #ifdef WORDS_BIGENDIAN
  2204 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
  2205 #else
  2206 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
  2207 #endif
  2209 static force_inline void
  2210 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
  2212     if (offs)
  2214 	int leading_pixels = 32 - offs;
  2215 	if (leading_pixels >= width)
  2217 	    if (v)
  2218 		*dst |= A1_FILL_MASK (width, offs);
  2219 	    else
  2220 		*dst &= ~A1_FILL_MASK (width, offs);
  2221 	    return;
  2223 	else
  2225 	    if (v)
  2226 		*dst++ |= A1_FILL_MASK (leading_pixels, offs);
  2227 	    else
  2228 		*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
  2229 	    width -= leading_pixels;
  2232     while (width >= 32)
  2234 	if (v)
  2235 	    *dst++ = 0xFFFFFFFF;
  2236 	else
  2237 	    *dst++ = 0;
  2238 	width -= 32;
  2240     if (width > 0)
  2242 	if (v)
  2243 	    *dst |= A1_FILL_MASK (width, 0);
  2244 	else
  2245 	    *dst &= ~A1_FILL_MASK (width, 0);
  2249 static void
  2250 pixman_fill1 (uint32_t *bits,
  2251               int       stride,
  2252               int       x,
  2253               int       y,
  2254               int       width,
  2255               int       height,
  2256               uint32_t  filler)
  2258     uint32_t *dst = bits + y * stride + (x >> 5);
  2259     int offs = x & 31;
  2261     if (filler & 1)
  2263 	while (height--)
  2265 	    pixman_fill1_line (dst, offs, width, 1);
  2266 	    dst += stride;
  2269     else
  2271 	while (height--)
  2273 	    pixman_fill1_line (dst, offs, width, 0);
  2274 	    dst += stride;
  2279 static void
  2280 pixman_fill8 (uint32_t *bits,
  2281               int       stride,
  2282               int       x,
  2283               int       y,
  2284               int       width,
  2285               int       height,
  2286               uint32_t  filler)
  2288     int byte_stride = stride * (int) sizeof (uint32_t);
  2289     uint8_t *dst = (uint8_t *) bits;
  2290     uint8_t v = filler & 0xff;
  2291     int i;
  2293     dst = dst + y * byte_stride + x;
  2295     while (height--)
  2297 	for (i = 0; i < width; ++i)
  2298 	    dst[i] = v;
  2300 	dst += byte_stride;
  2304 static void
  2305 pixman_fill16 (uint32_t *bits,
  2306                int       stride,
  2307                int       x,
  2308                int       y,
  2309                int       width,
  2310                int       height,
  2311                uint32_t  filler)
  2313     int short_stride =
  2314 	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
  2315     uint16_t *dst = (uint16_t *)bits;
  2316     uint16_t v = filler & 0xffff;
  2317     int i;
  2319     dst = dst + y * short_stride + x;
  2321     while (height--)
  2323 	for (i = 0; i < width; ++i)
  2324 	    dst[i] = v;
  2326 	dst += short_stride;
  2330 static void
  2331 pixman_fill32 (uint32_t *bits,
  2332                int       stride,
  2333                int       x,
  2334                int       y,
  2335                int       width,
  2336                int       height,
  2337                uint32_t  filler)
  2339     int i;
  2341     bits = bits + y * stride + x;
  2343     while (height--)
  2345 	for (i = 0; i < width; ++i)
  2346 	    bits[i] = filler;
  2348 	bits += stride;
  2352 static pixman_bool_t
  2353 fast_path_fill (pixman_implementation_t *imp,
  2354                 uint32_t *               bits,
  2355                 int                      stride,
  2356                 int                      bpp,
  2357                 int                      x,
  2358                 int                      y,
  2359                 int                      width,
  2360                 int                      height,
  2361                 uint32_t		 filler)
  2363     switch (bpp)
  2365     case 1:
  2366 	pixman_fill1 (bits, stride, x, y, width, height, filler);
  2367 	break;
  2369     case 8:
  2370 	pixman_fill8 (bits, stride, x, y, width, height, filler);
  2371 	break;
  2373     case 16:
  2374 	pixman_fill16 (bits, stride, x, y, width, height, filler);
  2375 	break;
  2377     case 32:
  2378 	pixman_fill32 (bits, stride, x, y, width, height, filler);
  2379 	break;
  2381     default:
  2382 	return FALSE;
  2385     return TRUE;
  2388 /*****************************************************************************/
  2390 static uint32_t *
  2391 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
  2393     int32_t w = iter->width;
  2394     uint32_t *dst = iter->buffer;
  2395     const uint16_t *src = (const uint16_t *)iter->bits;
  2397     iter->bits += iter->stride;
  2399     /* Align the source buffer at 4 bytes boundary */
  2400     if (w > 0 && ((uintptr_t)src & 3))
  2402 	*dst++ = convert_0565_to_8888 (*src++);
  2403 	w--;
  2405     /* Process two pixels per iteration */
  2406     while ((w -= 2) >= 0)
  2408 	uint32_t sr, sb, sg, t0, t1;
  2409 	uint32_t s = *(const uint32_t *)src;
  2410 	src += 2;
  2411 	sr = (s >> 8) & 0x00F800F8;
  2412 	sb = (s << 3) & 0x00F800F8;
  2413 	sg = (s >> 3) & 0x00FC00FC;
  2414 	sr |= sr >> 5;
  2415 	sb |= sb >> 5;
  2416 	sg |= sg >> 6;
  2417 	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
  2418 	     (sb & 0xFF) | 0xFF000000;
  2419 	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
  2420 	     (sb >> 16) | 0xFF000000;
  2421 #ifdef WORDS_BIGENDIAN
  2422 	*dst++ = t1;
  2423 	*dst++ = t0;
  2424 #else
  2425 	*dst++ = t0;
  2426 	*dst++ = t1;
  2427 #endif
  2429     if (w & 1)
  2431 	*dst = convert_0565_to_8888 (*src);
  2434     return iter->buffer;
  2437 static uint32_t *
  2438 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
  2440     iter->bits += iter->stride;
  2441     return iter->buffer;
  2444 /* Helper function for a workaround, which tries to ensure that 0x1F001F
  2445  * constant is always allocated in a register on RISC architectures.
  2446  */
  2447 static force_inline uint32_t
  2448 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
  2450     uint32_t a, b;
  2451     a = (s >> 3) & x1F001F;
  2452     b = s & 0xFC00;
  2453     a |= a >> 5;
  2454     a |= b >> 5;
  2455     return a;
  2458 static void
  2459 fast_write_back_r5g6b5 (pixman_iter_t *iter)
  2461     int32_t w = iter->width;
  2462     uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
  2463     const uint32_t *src = iter->buffer;
  2464     /* Workaround to ensure that x1F001F variable is allocated in a register */
  2465     static volatile uint32_t volatile_x1F001F = 0x1F001F;
  2466     uint32_t x1F001F = volatile_x1F001F;
  2468     while ((w -= 4) >= 0)
  2470 	uint32_t s1 = *src++;
  2471 	uint32_t s2 = *src++;
  2472 	uint32_t s3 = *src++;
  2473 	uint32_t s4 = *src++;
  2474 	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
  2475 	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
  2476 	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
  2477 	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
  2479     if (w & 2)
  2481 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
  2482 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
  2484     if (w & 1)
  2486 	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
  2490 typedef struct
  2492     pixman_format_code_t	format;
  2493     pixman_iter_get_scanline_t	get_scanline;
  2494     pixman_iter_write_back_t	write_back;
  2495 } fetcher_info_t;
  2497 static const fetcher_info_t fetchers[] =
  2499     { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
  2500     { PIXMAN_null }
  2501 };
  2503 static pixman_bool_t
  2504 fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
  2506     pixman_image_t *image = iter->image;
  2508 #define FLAGS								\
  2509     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
  2510      FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
  2512     if (iter->iter_flags & ITER_16)
  2513 	    return FALSE;
  2515     if ((iter->iter_flags & ITER_NARROW)			&&
  2516 	(iter->image_flags & FLAGS) == FLAGS)
  2518 	const fetcher_info_t *f;
  2520 	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
  2522 	    if (image->common.extended_format_code == f->format)
  2524 		uint8_t *b = (uint8_t *)image->bits.bits;
  2525 		int s = image->bits.rowstride * 4;
  2527 		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
  2528 		iter->stride = s;
  2530 		iter->get_scanline = f->get_scanline;
  2531 		return TRUE;
  2536     return FALSE;
  2539 static pixman_bool_t
  2540 fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
  2542     pixman_image_t *image = iter->image;
  2544     if (iter->iter_flags & ITER_16)
  2545 	    return FALSE;
  2547     if ((iter->iter_flags & ITER_NARROW)		&&
  2548 	(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
  2550 	const fetcher_info_t *f;
  2552 	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
  2554 	    if (image->common.extended_format_code == f->format)
  2556 		uint8_t *b = (uint8_t *)image->bits.bits;
  2557 		int s = image->bits.rowstride * 4;
  2559 		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
  2560 		iter->stride = s;
  2562 		if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
  2563 		    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
  2565 		    iter->get_scanline = fast_dest_fetch_noop;
  2567 		else
  2569 		    iter->get_scanline = f->get_scanline;
  2571 		iter->write_back = f->write_back;
  2572 		return TRUE;
  2576     return FALSE;
  2580 pixman_implementation_t *
  2581 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
  2583     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
  2585     imp->fill = fast_path_fill;
  2586     imp->src_iter_init = fast_src_iter_init;
  2587     imp->dest_iter_init = fast_dest_iter_init;
  2589     return imp;

mercurial