gfx/cairo/libpixman/src/pixman-inlines.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
     2 /*
     3  * Copyright © 2000 SuSE, Inc.
     4  * Copyright © 2007 Red Hat, Inc.
     5  *
     6  * Permission to use, copy, modify, distribute, and sell this software and its
     7  * documentation for any purpose is hereby granted without fee, provided that
     8  * the above copyright notice appear in all copies and that both that
     9  * copyright notice and this permission notice appear in supporting
    10  * documentation, and that the name of SuSE not be used in advertising or
    11  * publicity pertaining to distribution of the software without specific,
    12  * written prior permission.  SuSE makes no representations about the
    13  * suitability of this software for any purpose.  It is provided "as is"
    14  * without express or implied warranty.
    15  *
    16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
    18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
    20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
    21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    22  *
    23  * Author:  Keith Packard, SuSE, Inc.
    24  */
    26 #ifndef PIXMAN_FAST_PATH_H__
    27 #define PIXMAN_FAST_PATH_H__
    29 #include "pixman-private.h"
    31 #define PIXMAN_REPEAT_COVER -1
    33 /* Flags describing input parameters to fast path macro template.
    34  * Turning on some flag values may indicate that
    35  * "some property X is available so template can use this" or
    36  * "some property X should be handled by template".
    37  *
    38  * FLAG_HAVE_SOLID_MASK
    39  *  Input mask is solid so template should handle this.
    40  *
    41  * FLAG_HAVE_NON_SOLID_MASK
    42  *  Input mask is bits mask so template should handle this.
    43  *
    44  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
    45  * exclusive. (It's not allowed to turn both flags on)
    46  */
    47 #define FLAG_NONE				(0)
    48 #define FLAG_HAVE_SOLID_MASK			(1 <<   1)
    49 #define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
    51 /* To avoid too short repeated scanline function calls, extend source
    52  * scanlines having width less than below constant value.
    53  */
    54 #define REPEAT_NORMAL_MIN_WIDTH			64
    56 static force_inline pixman_bool_t
    57 repeat (pixman_repeat_t repeat, int *c, int size)
    58 {
    59     if (repeat == PIXMAN_REPEAT_NONE)
    60     {
    61 	if (*c < 0 || *c >= size)
    62 	    return FALSE;
    63     }
    64     else if (repeat == PIXMAN_REPEAT_NORMAL)
    65     {
    66 	while (*c >= size)
    67 	    *c -= size;
    68 	while (*c < 0)
    69 	    *c += size;
    70     }
    71     else if (repeat == PIXMAN_REPEAT_PAD)
    72     {
    73 	*c = CLIP (*c, 0, size - 1);
    74     }
    75     else /* REFLECT */
    76     {
    77 	*c = MOD (*c, size * 2);
    78 	if (*c >= size)
    79 	    *c = size * 2 - *c - 1;
    80     }
    81     return TRUE;
    82 }
    84 static force_inline int
    85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
    86 {
    87     return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
    88 	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
    89 }
    91 #if BILINEAR_INTERPOLATION_BITS <= 4
    92 /* Inspired by Filter_32_opaque from Skia */
    93 static force_inline uint32_t
    94 bilinear_interpolation (uint32_t tl, uint32_t tr,
    95 			uint32_t bl, uint32_t br,
    96 			int distx, int disty)
    97 {
    98     int distxy, distxiy, distixy, distixiy;
    99     uint32_t lo, hi;
   101     distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
   102     disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
   104     distxy = distx * disty;
   105     distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
   106     distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
   107     distixiy =
   108 	16 * 16 - (disty << 4) -
   109 	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
   111     lo = (tl & 0xff00ff) * distixiy;
   112     hi = ((tl >> 8) & 0xff00ff) * distixiy;
   114     lo += (tr & 0xff00ff) * distxiy;
   115     hi += ((tr >> 8) & 0xff00ff) * distxiy;
   117     lo += (bl & 0xff00ff) * distixy;
   118     hi += ((bl >> 8) & 0xff00ff) * distixy;
   120     lo += (br & 0xff00ff) * distxy;
   121     hi += ((br >> 8) & 0xff00ff) * distxy;
   123     return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
   124 }
   126 #else
   127 #if SIZEOF_LONG > 4
   129 static force_inline uint32_t
   130 bilinear_interpolation (uint32_t tl, uint32_t tr,
   131 			uint32_t bl, uint32_t br,
   132 			int distx, int disty)
   133 {
   134     uint64_t distxy, distxiy, distixy, distixiy;
   135     uint64_t tl64, tr64, bl64, br64;
   136     uint64_t f, r;
   138     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
   139     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
   141     distxy = distx * disty;
   142     distxiy = distx * (256 - disty);
   143     distixy = (256 - distx) * disty;
   144     distixiy = (256 - distx) * (256 - disty);
   146     /* Alpha and Blue */
   147     tl64 = tl & 0xff0000ff;
   148     tr64 = tr & 0xff0000ff;
   149     bl64 = bl & 0xff0000ff;
   150     br64 = br & 0xff0000ff;
   152     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
   153     r = f & 0x0000ff0000ff0000ull;
   155     /* Red and Green */
   156     tl64 = tl;
   157     tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
   159     tr64 = tr;
   160     tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
   162     bl64 = bl;
   163     bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
   165     br64 = br;
   166     br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
   168     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
   169     r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
   171     return (uint32_t)(r >> 16);
   172 }
   174 #else
   176 #ifdef LOW_QUALITY_INTERPOLATION
   177 /* Based on Filter_32_opaque_portable from Skia */
   178 static force_inline uint32_t
   179 bilinear_interpolation(uint32_t a00, uint32_t a01,
   180 		       uint32_t a10, uint32_t a11,
   181 		       int x, int y)
   182 {
   183     int xy = x * y;
   184     static const uint32_t mask = 0xff00ff;
   186     int scale = 256 - 16*y - 16*x + xy;
   187     uint32_t lo = (a00 & mask) * scale;
   188     uint32_t hi = ((a00 >> 8) & mask) * scale;
   190     scale = 16*x - xy;
   191     lo += (a01 & mask) * scale;
   192     hi += ((a01 >> 8) & mask) * scale;
   194     scale = 16*y - xy;
   195     lo += (a10 & mask) * scale;
   196     hi += ((a10 >> 8) & mask) * scale;
   198     lo += (a11 & mask) * xy;
   199     hi += ((a11 >> 8) & mask) * xy;
   201     return ((lo >> 8) & mask) | (hi & ~mask);
   202 }
   203 #else
   204 static force_inline uint32_t
   205 bilinear_interpolation (uint32_t tl, uint32_t tr,
   206 			uint32_t bl, uint32_t br,
   207 			int distx, int disty)
   208 {
   209     int distxy, distxiy, distixy, distixiy;
   210     uint32_t f, r;
   212     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
   213     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
   215     distxy = distx * disty;
   216     distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
   217     distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
   218     distixiy =
   219 	256 * 256 - (disty << 8) -
   220 	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
   222     /* Blue */
   223     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
   224       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
   226     /* Green */
   227     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
   228       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
   229     r |= f & 0xff000000;
   231     tl >>= 16;
   232     tr >>= 16;
   233     bl >>= 16;
   234     br >>= 16;
   235     r >>= 16;
   237     /* Red */
   238     f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
   239       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
   240     r |= f & 0x00ff0000;
   242     /* Alpha */
   243     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
   244       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
   245     r |= f & 0xff000000;
   247     return r;
   248 }
   249 #endif
   250 #endif
   251 #endif // BILINEAR_INTERPOLATION_BITS <= 4
   253 /*
   254  * For each scanline fetched from source image with PAD repeat:
   255  * - calculate how many pixels need to be padded on the left side
   256  * - calculate how many pixels need to be padded on the right side
   257  * - update width to only count pixels which are fetched from the image
   258  * All this information is returned via 'width', 'left_pad', 'right_pad'
   259  * arguments. The code is assuming that 'unit_x' is positive.
   260  *
   261  * Note: 64-bit math is used in order to avoid potential overflows, which
   262  *       is probably excessive in many cases. This particular function
   263  *       may need its own correctness test and performance tuning.
   264  */
   265 static force_inline void
   266 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
   267 				pixman_fixed_t  vx,
   268 				pixman_fixed_t  unit_x,
   269 				int32_t *       width,
   270 				int32_t *       left_pad,
   271 				int32_t *       right_pad)
   272 {
   273     int64_t max_vx = (int64_t) source_image_width << 16;
   274     int64_t tmp;
   275     if (vx < 0)
   276     {
   277 	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
   278 	if (tmp > *width)
   279 	{
   280 	    *left_pad = *width;
   281 	    *width = 0;
   282 	}
   283 	else
   284 	{
   285 	    *left_pad = (int32_t) tmp;
   286 	    *width -= (int32_t) tmp;
   287 	}
   288     }
   289     else
   290     {
   291 	*left_pad = 0;
   292     }
   293     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
   294     if (tmp < 0)
   295     {
   296 	*right_pad = *width;
   297 	*width = 0;
   298     }
   299     else if (tmp >= *width)
   300     {
   301 	*right_pad = 0;
   302     }
   303     else
   304     {
   305 	*right_pad = *width - (int32_t) tmp;
   306 	*width = (int32_t) tmp;
   307     }
   308 }
   310 /* A macroified version of specialized nearest scalers for some
   311  * common 8888 and 565 formats. It supports SRC and OVER ops.
   312  *
   313  * There are two repeat versions, one that handles repeat normal,
   314  * and one without repeat handling that only works if the src region
   315  * used is completely covered by the pre-repeated source samples.
   316  *
   317  * The loops are unrolled to process two pixels per iteration for better
   318  * performance on most CPU architectures (superscalar processors
   319  * can issue several operations simultaneously, other processors can hide
   320  * instructions latencies by pipelining operations). Unrolling more
   321  * does not make much sense because the compiler will start running out
   322  * of spare registers soon.
   323  */
   325 #define GET_8888_ALPHA(s) ((s) >> 24)
   326  /* This is not actually used since we don't have an OVER with
   327     565 source, but it is needed to build. */
   328 #define GET_0565_ALPHA(s) 0xff
   329 #define GET_x888_ALPHA(s) 0xff
   331 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
   332 			      src_type_t, dst_type_t, OP, repeat_mode)				\
   333 static force_inline void									\
   334 scanline_func_name (dst_type_t       *dst,							\
   335 		    const src_type_t *src,							\
   336 		    int32_t           w,							\
   337 		    pixman_fixed_t    vx,							\
   338 		    pixman_fixed_t    unit_x,							\
   339 		    pixman_fixed_t    src_width_fixed,						\
   340 		    pixman_bool_t     fully_transparent_src)					\
   341 {												\
   342 	uint32_t   d;										\
   343 	src_type_t s1, s2;									\
   344 	uint8_t    a1, a2;									\
   345 	int        x1, x2;									\
   346 												\
   347 	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
   348 	    return;										\
   349 												\
   350 	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
   351 	    abort();										\
   352 												\
   353 	while ((w -= 2) >= 0)									\
   354 	{											\
   355 	    x1 = pixman_fixed_to_int (vx);							\
   356 	    vx += unit_x;									\
   357 	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   358 	    {											\
   359 		/* This works because we know that unit_x is positive */			\
   360 		while (vx >= 0)									\
   361 		    vx -= src_width_fixed;							\
   362 	    }											\
   363 	    s1 = *(src + x1);									\
   364 												\
   365 	    x2 = pixman_fixed_to_int (vx);							\
   366 	    vx += unit_x;									\
   367 	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   368 	    {											\
   369 		/* This works because we know that unit_x is positive */			\
   370 		while (vx >= 0)									\
   371 		    vx -= src_width_fixed;							\
   372 	    }											\
   373 	    s2 = *(src + x2);									\
   374 												\
   375 	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
   376 	    {											\
   377 		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
   378 		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
   379 												\
   380 		if (a1 == 0xff)									\
   381 		{										\
   382 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   383 		}										\
   384 		else if (s1)									\
   385 		{										\
   386 		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
   387 		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
   388 		    a1 ^= 0xff;									\
   389 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
   390 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
   391 		}										\
   392 		dst++;										\
   393 												\
   394 		if (a2 == 0xff)									\
   395 		{										\
   396 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
   397 		}										\
   398 		else if (s2)									\
   399 		{										\
   400 		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
   401 		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
   402 		    a2 ^= 0xff;									\
   403 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
   404 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
   405 		}										\
   406 		dst++;										\
   407 	    }											\
   408 	    else /* PIXMAN_OP_SRC */								\
   409 	    {											\
   410 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   411 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
   412 	    }											\
   413 	}											\
   414 												\
   415 	if (w & 1)										\
   416 	{											\
   417 	    x1 = pixman_fixed_to_int (vx);							\
   418 	    s1 = *(src + x1);									\
   419 												\
   420 	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
   421 	    {											\
   422 		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
   423 												\
   424 		if (a1 == 0xff)									\
   425 		{										\
   426 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   427 		}										\
   428 		else if (s1)									\
   429 		{										\
   430 		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
   431 		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
   432 		    a1 ^= 0xff;									\
   433 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
   434 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
   435 		}										\
   436 		dst++;										\
   437 	    }											\
   438 	    else /* PIXMAN_OP_SRC */								\
   439 	    {											\
   440 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   441 	    }											\
   442 	}											\
   443 }
   445 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   446 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
   447 static void											\
   448 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
   449 						   pixman_composite_info_t *info)               \
   450 {												\
   451     PIXMAN_COMPOSITE_ARGS (info);					                        \
   452     dst_type_t *dst_line;						                        \
   453     mask_type_t *mask_line;									\
   454     src_type_t *src_first_line;									\
   455     int       y;										\
   456     pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
   457     pixman_fixed_t max_vy;									\
   458     pixman_vector_t v;										\
   459     pixman_fixed_t vx, vy;									\
   460     pixman_fixed_t unit_x, unit_y;								\
   461     int32_t left_pad, right_pad;								\
   462 												\
   463     src_type_t *src;										\
   464     dst_type_t *dst;										\
   465     mask_type_t solid_mask;									\
   466     const mask_type_t *mask = &solid_mask;							\
   467     int src_stride, mask_stride, dst_stride;							\
   468 												\
   469     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
   470     if (have_mask)										\
   471     {												\
   472 	if (mask_is_solid)									\
   473 	    solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
   474 	else											\
   475 	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
   476 				   mask_stride, mask_line, 1);					\
   477     }												\
   478     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
   479      * transformed from destination space to source space */					\
   480     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
   481 												\
   482     /* reference point is the center of the pixel */						\
   483     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
   484     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
   485     v.vector[2] = pixman_fixed_1;								\
   486 												\
   487     if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
   488 	return;											\
   489 												\
   490     unit_x = src_image->common.transform->matrix[0][0];						\
   491     unit_y = src_image->common.transform->matrix[1][1];						\
   492 												\
   493     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
   494     v.vector[0] -= pixman_fixed_e;								\
   495     v.vector[1] -= pixman_fixed_e;								\
   496 												\
   497     vx = v.vector[0];										\
   498     vy = v.vector[1];										\
   499 												\
   500     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
   501     {												\
   502 	max_vy = pixman_int_to_fixed (src_image->bits.height);					\
   503 												\
   504 	/* Clamp repeating positions inside the actual samples */				\
   505 	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
   506 	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
   507     }												\
   508 												\
   509     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
   510 	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
   511     {												\
   512 	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
   513 					&width, &left_pad, &right_pad);				\
   514 	vx += left_pad * unit_x;								\
   515     }												\
   516 												\
   517     while (--height >= 0)									\
   518     {												\
   519 	dst = dst_line;										\
   520 	dst_line += dst_stride;									\
   521 	if (have_mask && !mask_is_solid)							\
   522 	{											\
   523 	    mask = mask_line;									\
   524 	    mask_line += mask_stride;								\
   525 	}											\
   526 												\
   527 	y = pixman_fixed_to_int (vy);								\
   528 	vy += unit_y;										\
   529 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   530 	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
   531 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
   532 	{											\
   533 	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
   534 	    src = src_first_line + src_stride * y;						\
   535 	    if (left_pad > 0)									\
   536 	    {											\
   537 		scanline_func (mask, dst,							\
   538 			       src + src_image->bits.width - src_image->bits.width + 1,		\
   539 			       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
   540 	    }											\
   541 	    if (width > 0)									\
   542 	    {											\
   543 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
   544 			       dst + left_pad, src + src_image->bits.width, width,		\
   545 			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
   546 	    }											\
   547 	    if (right_pad > 0)									\
   548 	    {											\
   549 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
   550 			       dst + left_pad + width, src + src_image->bits.width,		\
   551 			       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
   552 	    }											\
   553 	}											\
   554 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
   555 	{											\
   556 	    static const src_type_t zero[1] = { 0 };						\
   557 	    if (y < 0 || y >= src_image->bits.height)						\
   558 	    {											\
   559 		scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
   560 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
   561 		continue;									\
   562 	    }											\
   563 	    src = src_first_line + src_stride * y;						\
   564 	    if (left_pad > 0)									\
   565 	    {											\
   566 		scanline_func (mask, dst, zero + 1, left_pad,					\
   567 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
   568 	    }											\
   569 	    if (width > 0)									\
   570 	    {											\
   571 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
   572 			       dst + left_pad, src + src_image->bits.width, width,		\
   573 			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
   574 	    }											\
   575 	    if (right_pad > 0)									\
   576 	    {											\
   577 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
   578 			       dst + left_pad + width, zero + 1, right_pad,			\
   579 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
   580 	    }											\
   581 	}											\
   582 	else											\
   583 	{											\
   584 	    src = src_first_line + src_stride * y;						\
   585 	    scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
   586 			   unit_x, src_width_fixed, FALSE);					\
   587 	}											\
   588     }												\
   589 }
   591 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
   592 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   593 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
   594 	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   595 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
   597 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
   598 			      repeat_mode)							\
   599     static force_inline void									\
   600     scanline_func##scale_func_name##_wrapper (							\
   601 		    const uint8_t    *mask,							\
   602 		    dst_type_t       *dst,							\
   603 		    const src_type_t *src,							\
   604 		    int32_t          w,								\
   605 		    pixman_fixed_t   vx,							\
   606 		    pixman_fixed_t   unit_x,							\
   607 		    pixman_fixed_t   max_vx,							\
   608 		    pixman_bool_t    fully_transparent_src)					\
   609     {												\
   610 	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
   611     }												\
   612     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
   613 			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
   615 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
   616 			      repeat_mode)							\
   617 	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
   618 			      dst_type_t, repeat_mode)
   620 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
   621 		     src_type_t, dst_type_t, OP, repeat_mode)				\
   622     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
   623 			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
   624 			  OP, repeat_mode)						\
   625     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
   626 			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
   627 			  src_type_t, dst_type_t, repeat_mode)
   630 #define SCALED_NEAREST_FLAGS						\
   631     (FAST_PATH_SCALE_TRANSFORM	|					\
   632      FAST_PATH_NO_ALPHA_MAP	|					\
   633      FAST_PATH_NEAREST_FILTER	|					\
   634      FAST_PATH_NO_ACCESSORS	|					\
   635      FAST_PATH_NARROW_FORMAT)
   637 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
   638     {   PIXMAN_OP_ ## op,						\
   639 	PIXMAN_ ## s,							\
   640 	(SCALED_NEAREST_FLAGS		|				\
   641 	 FAST_PATH_NORMAL_REPEAT	|				\
   642 	 FAST_PATH_X_UNIT_POSITIVE),					\
   643 	PIXMAN_null, 0,							\
   644 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   645 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
   646     }
   648 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
   649     {   PIXMAN_OP_ ## op,						\
   650 	PIXMAN_ ## s,							\
   651 	(SCALED_NEAREST_FLAGS		|				\
   652 	 FAST_PATH_PAD_REPEAT		|				\
   653 	 FAST_PATH_X_UNIT_POSITIVE),					\
   654 	PIXMAN_null, 0,							\
   655 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   656 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
   657     }
   659 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
   660     {   PIXMAN_OP_ ## op,						\
   661 	PIXMAN_ ## s,							\
   662 	(SCALED_NEAREST_FLAGS		|				\
   663 	 FAST_PATH_NONE_REPEAT		|				\
   664 	 FAST_PATH_X_UNIT_POSITIVE),					\
   665 	PIXMAN_null, 0,							\
   666 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   667 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
   668     }
   670 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
   671     {   PIXMAN_OP_ ## op,						\
   672 	PIXMAN_ ## s,							\
   673 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
   674 	PIXMAN_null, 0,							\
   675 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   676 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
   677     }
   679 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
   680     {   PIXMAN_OP_ ## op,						\
   681 	PIXMAN_ ## s,							\
   682 	(SCALED_NEAREST_FLAGS		|				\
   683 	 FAST_PATH_NORMAL_REPEAT	|				\
   684 	 FAST_PATH_X_UNIT_POSITIVE),					\
   685 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   686 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   687 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
   688     }
   690 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
   691     {   PIXMAN_OP_ ## op,						\
   692 	PIXMAN_ ## s,							\
   693 	(SCALED_NEAREST_FLAGS		|				\
   694 	 FAST_PATH_PAD_REPEAT		|				\
   695 	 FAST_PATH_X_UNIT_POSITIVE),					\
   696 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   697 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   698 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
   699     }
   701 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
   702     {   PIXMAN_OP_ ## op,						\
   703 	PIXMAN_ ## s,							\
   704 	(SCALED_NEAREST_FLAGS		|				\
   705 	 FAST_PATH_NONE_REPEAT		|				\
   706 	 FAST_PATH_X_UNIT_POSITIVE),					\
   707 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   708 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   709 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
   710     }
   712 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
   713     {   PIXMAN_OP_ ## op,						\
   714 	PIXMAN_ ## s,							\
   715 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
   716 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   717 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   718 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
   719     }
   721 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
   722     {   PIXMAN_OP_ ## op,						\
   723 	PIXMAN_ ## s,							\
   724 	(SCALED_NEAREST_FLAGS		|				\
   725 	 FAST_PATH_NORMAL_REPEAT	|				\
   726 	 FAST_PATH_X_UNIT_POSITIVE),					\
   727 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   728 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   729 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
   730     }
   732 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
   733     {   PIXMAN_OP_ ## op,						\
   734 	PIXMAN_ ## s,							\
   735 	(SCALED_NEAREST_FLAGS		|				\
   736 	 FAST_PATH_PAD_REPEAT		|				\
   737 	 FAST_PATH_X_UNIT_POSITIVE),					\
   738 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   739 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   740 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
   741     }
   743 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
   744     {   PIXMAN_OP_ ## op,						\
   745 	PIXMAN_ ## s,							\
   746 	(SCALED_NEAREST_FLAGS		|				\
   747 	 FAST_PATH_NONE_REPEAT		|				\
   748 	 FAST_PATH_X_UNIT_POSITIVE),					\
   749 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   750 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   751 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
   752     }
   754 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
   755     {   PIXMAN_OP_ ## op,						\
   756 	PIXMAN_ ## s,							\
   757 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
   758 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   759 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   760 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
   761     }
   763 /* Prefer the use of 'cover' variant, because it is faster */
   764 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
   765     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
   766     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
   767     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
   768     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
   770 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
   771     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
   772     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
   773     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
   775 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
   776     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
   777     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
   778     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
   780 /*****************************************************************************/
   782 /*
   783  * Identify 5 zones in each scanline for bilinear scaling. Depending on
   784  * whether 2 pixels to be interpolated are fetched from the image itself,
   785  * from the padding area around it or from both image and padding area.
   786  */
   787 static force_inline void
   788 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
   789 					 pixman_fixed_t  vx,
   790 					 pixman_fixed_t  unit_x,
   791 					 int32_t *       left_pad,
   792 					 int32_t *       left_tz,
   793 					 int32_t *       width,
   794 					 int32_t *       right_tz,
   795 					 int32_t *       right_pad)
   796 {
   797 	int width1 = *width, left_pad1, right_pad1;
   798 	int width2 = *width, left_pad2, right_pad2;
   800 	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
   801 					&width1, &left_pad1, &right_pad1);
   802 	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
   803 					unit_x, &width2, &left_pad2, &right_pad2);
   805 	*left_pad = left_pad2;
   806 	*left_tz = left_pad1 - left_pad2;
   807 	*right_tz = right_pad2 - right_pad1;
   808 	*right_pad = right_pad1;
   809 	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
   810 }
   812 /*
   813  * Main loop template for single pass bilinear scaling. It needs to be
   814  * provided with 'scanline_func' which should do the compositing operation.
   815  * The needed function has the following prototype:
   816  *
   817  *	scanline_func (dst_type_t *       dst,
   818  *		       const mask_type_ * mask,
   819  *		       const src_type_t * src_top,
   820  *		       const src_type_t * src_bottom,
   821  *		       int32_t            width,
   822  *		       int                weight_top,
   823  *		       int                weight_bottom,
   824  *		       pixman_fixed_t     vx,
   825  *		       pixman_fixed_t     unit_x,
   826  *		       pixman_fixed_t     max_vx,
   827  *		       pixman_bool_t      zero_src)
   828  *
   829  * Where:
   830  *  dst                 - destination scanline buffer for storing results
   831  *  mask                - mask buffer (or single value for solid mask)
   832  *  src_top, src_bottom - two source scanlines
   833  *  width               - number of pixels to process
   834  *  weight_top          - weight of the top row for interpolation
   835  *  weight_bottom       - weight of the bottom row for interpolation
   836  *  vx                  - initial position for fetching the first pair of
   837  *                        pixels from the source buffer
   838  *  unit_x              - position increment needed to move to the next pair
   839  *                        of pixels
   840  *  max_vx              - image size as a fixed point value, can be used for
   841  *                        implementing NORMAL repeat (when it is supported)
   842  *  zero_src            - boolean hint variable, which is set to TRUE when
   843  *                        all source pixels are fetched from zero padding
   844  *                        zone for NONE repeat
   845  *
   846  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
   847  *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
   848  *       for NONE repeat when handling fuzzy antialiased top or bottom image
   849  *       edges. Also both top and bottom weight variables are guaranteed to
   850  *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
   851  *       For example, the weights can fit into unsigned byte or be used
   852  *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
   853  *       precision.
   854  */
   856 /* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
   857  * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
   858  * combine), "op_func" may be NULL, in this case we keep old behavior.
   859  * This is ugly and gcc issues some warnings, but works.
   860  *
   861  * An advice: clang has much better error reporting than gcc for deeply nested macros.
   862  */
   864 #define	scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,            \
   865                       scanline_buf, mask, src_top, src_bottom, width,                           \
   866                       weight_top, weight_bottom, vx, unit_x, max_vx, zero_src)                  \
   867  do {                                                                                           \
   868 		if (op_func != NULL)								\
   869 		{										\
   870 		    fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
   871                         (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));   \
   872 		    ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
   873 			((dst), (mask), (src_type_t *)scanline_buf, (width));			\
   874 		}										\
   875 		else										\
   876 		{										\
   877 		    fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top),  \
   878                                 (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));         \
   879 		}                                                                               \
   880   } while (0)
   883 #define SCANLINE_BUFFER_LENGTH 3072
   885 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t,		\
   886 				  mask_type_t, dst_type_t, repeat_mode, flags)			\
   887 static void											\
   888 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
   889 						   pixman_composite_info_t *info)		\
   890 {												\
   891     PIXMAN_COMPOSITE_ARGS (info);								\
   892     dst_type_t *dst_line;									\
   893     mask_type_t *mask_line;									\
   894     src_type_t *src_first_line;									\
   895     int       y1, y2;										\
   896     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
   897     pixman_vector_t v;										\
   898     pixman_fixed_t vx, vy;									\
   899     pixman_fixed_t unit_x, unit_y;								\
   900     int32_t left_pad, left_tz, right_tz, right_pad;						\
   901 												\
   902     dst_type_t *dst;										\
   903     mask_type_t solid_mask;									\
   904     const mask_type_t *mask = &solid_mask;							\
   905     int src_stride, mask_stride, dst_stride;							\
   906 												\
   907     int src_width;										\
   908     pixman_fixed_t src_width_fixed;								\
   909     int max_x;											\
   910     pixman_bool_t need_src_extension;								\
   911                                                                                                 \
   912     uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH];                                     \
   913     uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;                               \
   914 												\
   915     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
   916     if (flags & FLAG_HAVE_SOLID_MASK)								\
   917     {												\
   918 	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
   919 	mask_stride = 0;									\
   920     }												\
   921     else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
   922     {												\
   923 	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
   924 			       mask_stride, mask_line, 1);					\
   925     }												\
   926 												\
   927     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
   928      * transformed from destination space to source space */					\
   929     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
   930 												\
   931     /* reference point is the center of the pixel */						\
   932     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
   933     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
   934     v.vector[2] = pixman_fixed_1;								\
   935 												\
   936     if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
   937 	return;											\
   938 												\
   939     unit_x = src_image->common.transform->matrix[0][0];						\
   940     unit_y = src_image->common.transform->matrix[1][1];						\
   941 												\
   942     v.vector[0] -= pixman_fixed_1 / 2;								\
   943     v.vector[1] -= pixman_fixed_1 / 2;								\
   944 												\
   945     vy = v.vector[1];										\
   946 												\
   947     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
   948 	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
   949     {												\
   950 	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
   951 					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
   952 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
   953 	{											\
   954 	    /* PAD repeat does not need special handling for 'transition zones' and */		\
   955 	    /* they can be combined with 'padding zones' safely */				\
   956 	    left_pad += left_tz;								\
   957 	    right_pad += right_tz;								\
   958 	    left_tz = right_tz = 0;								\
   959 	}											\
   960 	v.vector[0] += left_pad * unit_x;							\
   961     }												\
   962 												\
   963     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
   964     {												\
   965 	vx = v.vector[0];									\
   966 	repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
   967 	max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
   968 												\
   969 	if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
   970 	{											\
   971 	    src_width = 0;									\
   972 												\
   973 	    while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
   974 		src_width += src_image->bits.width;						\
   975 												\
   976 	    need_src_extension = TRUE;								\
   977 	}											\
   978 	else											\
   979 	{											\
   980 	    src_width = src_image->bits.width;							\
   981 	    need_src_extension = FALSE;								\
   982 	}											\
   983 												\
   984 	src_width_fixed = pixman_int_to_fixed (src_width);					\
   985     }												\
   986                                                                                                 \
   987     if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer))          \
   988     {                                                                                           \
   989 	scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t));                         \
   990                                                                                                 \
   991 	if (!scanline_buffer)                                                                   \
   992 	    return;                                                                             \
   993     }                                                                                           \
   994 												\
   995     while (--height >= 0)									\
   996     {												\
   997 	int weight1, weight2;									\
   998 	dst = dst_line;										\
   999 	dst_line += dst_stride;									\
  1000 	vx = v.vector[0];									\
  1001 	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
  1002 	{											\
  1003 	    mask = mask_line;									\
  1004 	    mask_line += mask_stride;								\
  1005 	}											\
  1007 	y1 = pixman_fixed_to_int (vy);								\
  1008 	weight2 = pixman_fixed_to_bilinear_weight (vy);						\
  1009 	if (weight2)										\
  1010 	{											\
  1011 	    /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
  1012 	    y2 = y1 + 1;									\
  1013 	    weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
  1014 	}											\
  1015 	else											\
  1016 	{											\
  1017 	    /* set both top and bottom row to the same scanline and tweak weights */		\
  1018 	    y2 = y1;										\
  1019 	    weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
  1020 	}											\
  1021 	vy += unit_y;										\
  1022 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
  1023 	{											\
  1024 	    src_type_t *src1, *src2;								\
  1025 	    src_type_t buf1[2];									\
  1026 	    src_type_t buf2[2];									\
  1027 	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
  1028 	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
  1029 	    src1 = src_first_line + src_stride * y1;						\
  1030 	    src2 = src_first_line + src_stride * y2;						\
  1032 	    if (left_pad > 0)									\
  1033 	    {											\
  1034 		buf1[0] = buf1[1] = src1[0];							\
  1035 		buf2[0] = buf2[1] = src2[0];							\
  1036 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1037 			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
  1038                                0, 0, 0, FALSE);	                                                \
  1039 		dst += left_pad;								\
  1040 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1041 		    mask += left_pad;								\
  1042 	    }											\
  1043 	    if (width > 0)									\
  1044 	    {											\
  1045 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1046 			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
  1047                                vx, unit_x, 0, FALSE);                                           \
  1048 		dst += width;									\
  1049 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1050 		    mask += width;								\
  1051 	    }											\
  1052 	    if (right_pad > 0)									\
  1053 	    {											\
  1054 		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
  1055 		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
  1056 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1057 			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
  1058                                0, 0, 0, FALSE);                                                 \
  1059 	    }											\
  1060 	}											\
  1061 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
  1062 	{											\
  1063 	    src_type_t *src1, *src2;								\
  1064 	    src_type_t buf1[2];									\
  1065 	    src_type_t buf2[2];									\
  1066 	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
  1067 	    if (y1 < 0)										\
  1068 	    {											\
  1069 		weight1 = 0;									\
  1070 		y1 = 0;										\
  1071 	    }											\
  1072 	    if (y1 >= src_image->bits.height)							\
  1073 	    {											\
  1074 		weight1 = 0;									\
  1075 		y1 = src_image->bits.height - 1;						\
  1076 	    }											\
  1077 	    if (y2 < 0)										\
  1078 	    {											\
  1079 		weight2 = 0;									\
  1080 		y2 = 0;										\
  1081 	    }											\
  1082 	    if (y2 >= src_image->bits.height)							\
  1083 	    {											\
  1084 		weight2 = 0;									\
  1085 		y2 = src_image->bits.height - 1;						\
  1086 	    }											\
  1087 	    src1 = src_first_line + src_stride * y1;						\
  1088 	    src2 = src_first_line + src_stride * y2;						\
  1090 	    if (left_pad > 0)									\
  1091 	    {											\
  1092 		buf1[0] = buf1[1] = 0;								\
  1093 		buf2[0] = buf2[1] = 0;								\
  1094 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1095 			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
  1096                                0, 0, 0, TRUE);	                                                \
  1097 		dst += left_pad;								\
  1098 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1099 		    mask += left_pad;								\
  1100 	    }											\
  1101 	    if (left_tz > 0)									\
  1102 	    {											\
  1103 		buf1[0] = 0;									\
  1104 		buf1[1] = src1[0];								\
  1105 		buf2[0] = 0;									\
  1106 		buf2[1] = src2[0];								\
  1107 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1108 			       scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2,	\
  1109 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
  1110 		dst += left_tz;									\
  1111 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1112 		    mask += left_tz;								\
  1113 		vx += left_tz * unit_x;								\
  1114 	    }											\
  1115 	    if (width > 0)									\
  1116 	    {											\
  1117 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1118 			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
  1119                                vx, unit_x, 0, FALSE);                                           \
  1120 		dst += width;									\
  1121 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1122 		    mask += width;								\
  1123 		vx += width * unit_x;								\
  1124 	    }											\
  1125 	    if (right_tz > 0)									\
  1126 	    {											\
  1127 		buf1[0] = src1[src_image->bits.width - 1];					\
  1128 		buf1[1] = 0;									\
  1129 		buf2[0] = src2[src_image->bits.width - 1];					\
  1130 		buf2[1] = 0;									\
  1131 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1132 			       scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2,   \
  1133 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
  1134 		dst += right_tz;								\
  1135 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1136 		    mask += right_tz;								\
  1137 	    }											\
  1138 	    if (right_pad > 0)									\
  1139 	    {											\
  1140 		buf1[0] = buf1[1] = 0;								\
  1141 		buf2[0] = buf2[1] = 0;								\
  1142 		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1143 			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
  1144                                0, 0, 0, TRUE);	                                                \
  1145 	    }											\
  1146 	}											\
  1147 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
  1148 	{											\
  1149 	    int32_t	    num_pixels;								\
  1150 	    int32_t	    width_remain;							\
  1151 	    src_type_t *    src_line_top;							\
  1152 	    src_type_t *    src_line_bottom;							\
  1153 	    src_type_t	    buf1[2];								\
  1154 	    src_type_t	    buf2[2];								\
  1155 	    src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
  1156 	    src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
  1157 	    int		    i, j;								\
  1159 	    repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
  1160 	    repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
  1161 	    src_line_top = src_first_line + src_stride * y1;					\
  1162 	    src_line_bottom = src_first_line + src_stride * y2;					\
  1164 	    if (need_src_extension)								\
  1165 	    {											\
  1166 		for (i=0; i<src_width;)								\
  1167 		{										\
  1168 		    for (j=0; j<src_image->bits.width; j++, i++)				\
  1169 		    {										\
  1170 			extended_src_line0[i] = src_line_top[j];				\
  1171 			extended_src_line1[i] = src_line_bottom[j];				\
  1172 		    }										\
  1173 		}										\
  1175 		src_line_top = &extended_src_line0[0];						\
  1176 		src_line_bottom = &extended_src_line1[0];					\
  1177 	    }											\
  1179 	    /* Top & Bottom wrap around buffer */						\
  1180 	    buf1[0] = src_line_top[src_width - 1];						\
  1181 	    buf1[1] = src_line_top[0];								\
  1182 	    buf2[0] = src_line_bottom[src_width - 1];						\
  1183 	    buf2[1] = src_line_bottom[0];							\
  1185 	    width_remain = width;								\
  1187 	    while (width_remain > 0)								\
  1188 	    {											\
  1189 		/* We use src_width_fixed because it can make vx in original source range */	\
  1190 		repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
  1192 		/* Wrap around part */								\
  1193 		if (pixman_fixed_to_int (vx) == src_width - 1)					\
  1194 		{										\
  1195 		    /* for positive unit_x							\
  1196 		     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
  1197 		     *										\
  1198 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
  1199 		     * So we are safe from overflow.						\
  1200 		     */										\
  1201 		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
  1203 		    if (num_pixels > width_remain)						\
  1204 			num_pixels = width_remain;						\
  1206 		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
  1207                                    dst, scanline_buffer, mask, buf1, buf2, num_pixels,          \
  1208 				   weight1, weight2, pixman_fixed_frac(vx),			\
  1209 				   unit_x, src_width_fixed, FALSE);				\
  1211 		    width_remain -= num_pixels;							\
  1212 		    vx += num_pixels * unit_x;							\
  1213 		    dst += num_pixels;								\
  1215 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
  1216 			mask += num_pixels;							\
  1218 		    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
  1219 		}										\
  1221 		/* Normal scanline composite */							\
  1222 		if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
  1223 		{										\
  1224 		    /* for positive unit_x							\
  1225 		     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
  1226 		     *										\
  1227 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
  1228 		     * So we are safe from overflow here.					\
  1229 		     */										\
  1230 		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
  1231 				  / unit_x) + 1;						\
  1233 		    if (num_pixels > width_remain)						\
  1234 			num_pixels = width_remain;						\
  1236 		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
  1237                                    dst, scanline_buffer, mask, src_line_top, src_line_bottom,   \
  1238                                    num_pixels, weight1, weight2, vx, unit_x, src_width_fixed,   \
  1239                                    FALSE);	                                                \
  1241 		    width_remain -= num_pixels;							\
  1242 		    vx += num_pixels * unit_x;							\
  1243 		    dst += num_pixels;								\
  1245 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
  1246 		        mask += num_pixels;							\
  1247 		}										\
  1248 	    }											\
  1249 	}											\
  1250 	else											\
  1251 	{											\
  1252 	    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,       \
  1253                            scanline_buffer, mask,                                               \
  1254                            src_first_line + src_stride * y1,					\
  1255 			   src_first_line + src_stride * y2, width,				\
  1256 			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
  1257 	}											\
  1258     }												\
  1259     if (scanline_buffer != (uint8_t *) stack_scanline_buffer)                                   \
  1260 	free (scanline_buffer);                                                                 \
  1263 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
  1264 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
  1265 				  dst_type_t, repeat_mode, flags)				\
  1266 	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
  1267 				  dst_type_t, repeat_mode, flags)
  1269 #define SCALED_BILINEAR_FLAGS						\
  1270     (FAST_PATH_SCALE_TRANSFORM	|					\
  1271      FAST_PATH_NO_ALPHA_MAP	|					\
  1272      FAST_PATH_BILINEAR_FILTER	|					\
  1273      FAST_PATH_NO_ACCESSORS	|					\
  1274      FAST_PATH_NARROW_FORMAT)
  1276 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
  1277     {   PIXMAN_OP_ ## op,						\
  1278 	PIXMAN_ ## s,							\
  1279 	(SCALED_BILINEAR_FLAGS		|				\
  1280 	 FAST_PATH_PAD_REPEAT		|				\
  1281 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1282 	PIXMAN_null, 0,							\
  1283 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1284 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
  1287 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
  1288     {   PIXMAN_OP_ ## op,						\
  1289 	PIXMAN_ ## s,							\
  1290 	(SCALED_BILINEAR_FLAGS		|				\
  1291 	 FAST_PATH_NONE_REPEAT		|				\
  1292 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1293 	PIXMAN_null, 0,							\
  1294 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1295 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
  1298 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
  1299     {   PIXMAN_OP_ ## op,						\
  1300 	PIXMAN_ ## s,							\
  1301 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
  1302 	PIXMAN_null, 0,							\
  1303 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1304 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
  1307 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
  1308     {   PIXMAN_OP_ ## op,						\
  1309 	PIXMAN_ ## s,							\
  1310 	(SCALED_BILINEAR_FLAGS		|				\
  1311 	 FAST_PATH_NORMAL_REPEAT	|				\
  1312 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1313 	PIXMAN_null, 0,							\
  1314 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1315 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
  1318 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
  1319     {   PIXMAN_OP_ ## op,						\
  1320 	PIXMAN_ ## s,							\
  1321 	(SCALED_BILINEAR_FLAGS		|				\
  1322 	 FAST_PATH_PAD_REPEAT		|				\
  1323 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1324 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1325 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1326 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
  1329 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
  1330     {   PIXMAN_OP_ ## op,						\
  1331 	PIXMAN_ ## s,							\
  1332 	(SCALED_BILINEAR_FLAGS		|				\
  1333 	 FAST_PATH_NONE_REPEAT		|				\
  1334 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1335 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1336 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1337 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
  1340 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
  1341     {   PIXMAN_OP_ ## op,						\
  1342 	PIXMAN_ ## s,							\
  1343 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
  1344 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1345 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1346 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
  1349 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
  1350     {   PIXMAN_OP_ ## op,						\
  1351 	PIXMAN_ ## s,							\
  1352 	(SCALED_BILINEAR_FLAGS		|				\
  1353 	 FAST_PATH_NORMAL_REPEAT	|				\
  1354 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1355 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1356 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1357 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
  1360 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
  1361     {   PIXMAN_OP_ ## op,						\
  1362 	PIXMAN_ ## s,							\
  1363 	(SCALED_BILINEAR_FLAGS		|				\
  1364 	 FAST_PATH_PAD_REPEAT		|				\
  1365 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1366 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1367 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1368 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
  1371 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
  1372     {   PIXMAN_OP_ ## op,						\
  1373 	PIXMAN_ ## s,							\
  1374 	(SCALED_BILINEAR_FLAGS		|				\
  1375 	 FAST_PATH_NONE_REPEAT		|				\
  1376 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1377 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1378 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1379 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
  1382 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
  1383     {   PIXMAN_OP_ ## op,						\
  1384 	PIXMAN_ ## s,							\
  1385 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
  1386 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1387 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1388 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
  1391 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
  1392     {   PIXMAN_OP_ ## op,						\
  1393 	PIXMAN_ ## s,							\
  1394 	(SCALED_BILINEAR_FLAGS		|				\
  1395 	 FAST_PATH_NORMAL_REPEAT	|				\
  1396 	 FAST_PATH_X_UNIT_POSITIVE),					\
  1397 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1398 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1399 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
  1402 /* Prefer the use of 'cover' variant, because it is faster */
  1403 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
  1404     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
  1405     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
  1406     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
  1407     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
  1409 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
  1410     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
  1411     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
  1412     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
  1413     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
  1415 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
  1416     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
  1417     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
  1418     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
  1419     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
  1421 #endif

mercurial