gfx/cairo/libpixman/src/pixman-inlines.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-inlines.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1421 @@
     1.4 +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
     1.5 +/*
     1.6 + * Copyright © 2000 SuSE, Inc.
     1.7 + * Copyright © 2007 Red Hat, Inc.
     1.8 + *
     1.9 + * Permission to use, copy, modify, distribute, and sell this software and its
    1.10 + * documentation for any purpose is hereby granted without fee, provided that
    1.11 + * the above copyright notice appear in all copies and that both that
    1.12 + * copyright notice and this permission notice appear in supporting
    1.13 + * documentation, and that the name of SuSE not be used in advertising or
    1.14 + * publicity pertaining to distribution of the software without specific,
    1.15 + * written prior permission.  SuSE makes no representations about the
    1.16 + * suitability of this software for any purpose.  It is provided "as is"
    1.17 + * without express or implied warranty.
    1.18 + *
    1.19 + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
    1.20 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
    1.21 + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    1.22 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
    1.23 + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
    1.24 + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    1.25 + *
    1.26 + * Author:  Keith Packard, SuSE, Inc.
    1.27 + */
    1.28 +
    1.29 +#ifndef PIXMAN_FAST_PATH_H__
    1.30 +#define PIXMAN_FAST_PATH_H__
    1.31 +
    1.32 +#include "pixman-private.h"
    1.33 +
    1.34 +#define PIXMAN_REPEAT_COVER -1
    1.35 +
    1.36 +/* Flags describing input parameters to fast path macro template.
    1.37 + * Turning on some flag values may indicate that
    1.38 + * "some property X is available so template can use this" or
    1.39 + * "some property X should be handled by template".
    1.40 + *
    1.41 + * FLAG_HAVE_SOLID_MASK
    1.42 + *  Input mask is solid so template should handle this.
    1.43 + *
    1.44 + * FLAG_HAVE_NON_SOLID_MASK
    1.45 + *  Input mask is bits mask so template should handle this.
    1.46 + *
    1.47 + * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
    1.48 + * exclusive. (It's not allowed to turn both flags on)
    1.49 + */
    1.50 +#define FLAG_NONE				(0)
    1.51 +#define FLAG_HAVE_SOLID_MASK			(1 <<   1)
    1.52 +#define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
    1.53 +
    1.54 +/* To avoid too short repeated scanline function calls, extend source
    1.55 + * scanlines having width less than below constant value.
    1.56 + */
    1.57 +#define REPEAT_NORMAL_MIN_WIDTH			64
    1.58 +
    1.59 +static force_inline pixman_bool_t
    1.60 +repeat (pixman_repeat_t repeat, int *c, int size)
    1.61 +{
    1.62 +    if (repeat == PIXMAN_REPEAT_NONE)
    1.63 +    {
    1.64 +	if (*c < 0 || *c >= size)
    1.65 +	    return FALSE;
    1.66 +    }
    1.67 +    else if (repeat == PIXMAN_REPEAT_NORMAL)
    1.68 +    {
    1.69 +	while (*c >= size)
    1.70 +	    *c -= size;
    1.71 +	while (*c < 0)
    1.72 +	    *c += size;
    1.73 +    }
    1.74 +    else if (repeat == PIXMAN_REPEAT_PAD)
    1.75 +    {
    1.76 +	*c = CLIP (*c, 0, size - 1);
    1.77 +    }
    1.78 +    else /* REFLECT */
    1.79 +    {
    1.80 +	*c = MOD (*c, size * 2);
    1.81 +	if (*c >= size)
    1.82 +	    *c = size * 2 - *c - 1;
    1.83 +    }
    1.84 +    return TRUE;
    1.85 +}
    1.86 +
    1.87 +static force_inline int
    1.88 +pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
    1.89 +{
    1.90 +    return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
    1.91 +	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
    1.92 +}
    1.93 +
    1.94 +#if BILINEAR_INTERPOLATION_BITS <= 4
    1.95 +/* Inspired by Filter_32_opaque from Skia */
    1.96 +static force_inline uint32_t
    1.97 +bilinear_interpolation (uint32_t tl, uint32_t tr,
    1.98 +			uint32_t bl, uint32_t br,
    1.99 +			int distx, int disty)
   1.100 +{
   1.101 +    int distxy, distxiy, distixy, distixiy;
   1.102 +    uint32_t lo, hi;
   1.103 +
   1.104 +    distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
   1.105 +    disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
   1.106 +
   1.107 +    distxy = distx * disty;
   1.108 +    distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
   1.109 +    distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
   1.110 +    distixiy =
   1.111 +	16 * 16 - (disty << 4) -
   1.112 +	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
   1.113 +
   1.114 +    lo = (tl & 0xff00ff) * distixiy;
   1.115 +    hi = ((tl >> 8) & 0xff00ff) * distixiy;
   1.116 +
   1.117 +    lo += (tr & 0xff00ff) * distxiy;
   1.118 +    hi += ((tr >> 8) & 0xff00ff) * distxiy;
   1.119 +
   1.120 +    lo += (bl & 0xff00ff) * distixy;
   1.121 +    hi += ((bl >> 8) & 0xff00ff) * distixy;
   1.122 +
   1.123 +    lo += (br & 0xff00ff) * distxy;
   1.124 +    hi += ((br >> 8) & 0xff00ff) * distxy;
   1.125 +
   1.126 +    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
   1.127 +}
   1.128 +
   1.129 +#else
   1.130 +#if SIZEOF_LONG > 4
   1.131 +
   1.132 +static force_inline uint32_t
   1.133 +bilinear_interpolation (uint32_t tl, uint32_t tr,
   1.134 +			uint32_t bl, uint32_t br,
   1.135 +			int distx, int disty)
   1.136 +{
   1.137 +    uint64_t distxy, distxiy, distixy, distixiy;
   1.138 +    uint64_t tl64, tr64, bl64, br64;
   1.139 +    uint64_t f, r;
   1.140 +
   1.141 +    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
   1.142 +    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
   1.143 +
   1.144 +    distxy = distx * disty;
   1.145 +    distxiy = distx * (256 - disty);
   1.146 +    distixy = (256 - distx) * disty;
   1.147 +    distixiy = (256 - distx) * (256 - disty);
   1.148 +
   1.149 +    /* Alpha and Blue */
   1.150 +    tl64 = tl & 0xff0000ff;
   1.151 +    tr64 = tr & 0xff0000ff;
   1.152 +    bl64 = bl & 0xff0000ff;
   1.153 +    br64 = br & 0xff0000ff;
   1.154 +
   1.155 +    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
   1.156 +    r = f & 0x0000ff0000ff0000ull;
   1.157 +
   1.158 +    /* Red and Green */
   1.159 +    tl64 = tl;
   1.160 +    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
   1.161 +
   1.162 +    tr64 = tr;
   1.163 +    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
   1.164 +
   1.165 +    bl64 = bl;
   1.166 +    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
   1.167 +
   1.168 +    br64 = br;
   1.169 +    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
   1.170 +
   1.171 +    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
   1.172 +    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
   1.173 +
   1.174 +    return (uint32_t)(r >> 16);
   1.175 +}
   1.176 +
   1.177 +#else
   1.178 +
   1.179 +#ifdef LOW_QUALITY_INTERPOLATION
   1.180 +/* Based on Filter_32_opaque_portable from Skia */
   1.181 +static force_inline uint32_t
   1.182 +bilinear_interpolation(uint32_t a00, uint32_t a01,
   1.183 +		       uint32_t a10, uint32_t a11,
   1.184 +		       int x, int y)
   1.185 +{
   1.186 +    int xy = x * y;
   1.187 +    static const uint32_t mask = 0xff00ff;
   1.188 +
   1.189 +    int scale = 256 - 16*y - 16*x + xy;
   1.190 +    uint32_t lo = (a00 & mask) * scale;
   1.191 +    uint32_t hi = ((a00 >> 8) & mask) * scale;
   1.192 +
   1.193 +    scale = 16*x - xy;
   1.194 +    lo += (a01 & mask) * scale;
   1.195 +    hi += ((a01 >> 8) & mask) * scale;
   1.196 +
   1.197 +    scale = 16*y - xy;
   1.198 +    lo += (a10 & mask) * scale;
   1.199 +    hi += ((a10 >> 8) & mask) * scale;
   1.200 +
   1.201 +    lo += (a11 & mask) * xy;
   1.202 +    hi += ((a11 >> 8) & mask) * xy;
   1.203 +
   1.204 +    return ((lo >> 8) & mask) | (hi & ~mask);
   1.205 +}
   1.206 +#else
   1.207 +static force_inline uint32_t
   1.208 +bilinear_interpolation (uint32_t tl, uint32_t tr,
   1.209 +			uint32_t bl, uint32_t br,
   1.210 +			int distx, int disty)
   1.211 +{
   1.212 +    int distxy, distxiy, distixy, distixiy;
   1.213 +    uint32_t f, r;
   1.214 +
   1.215 +    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
   1.216 +    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
   1.217 +
   1.218 +    distxy = distx * disty;
   1.219 +    distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
   1.220 +    distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
   1.221 +    distixiy =
   1.222 +	256 * 256 - (disty << 8) -
   1.223 +	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
   1.224 +
   1.225 +    /* Blue */
   1.226 +    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
   1.227 +      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
   1.228 +
   1.229 +    /* Green */
   1.230 +    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
   1.231 +      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
   1.232 +    r |= f & 0xff000000;
   1.233 +
   1.234 +    tl >>= 16;
   1.235 +    tr >>= 16;
   1.236 +    bl >>= 16;
   1.237 +    br >>= 16;
   1.238 +    r >>= 16;
   1.239 +
   1.240 +    /* Red */
   1.241 +    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
   1.242 +      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
   1.243 +    r |= f & 0x00ff0000;
   1.244 +
   1.245 +    /* Alpha */
   1.246 +    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
   1.247 +      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
   1.248 +    r |= f & 0xff000000;
   1.249 +
   1.250 +    return r;
   1.251 +}
   1.252 +#endif
   1.253 +#endif
   1.254 +#endif // BILINEAR_INTERPOLATION_BITS <= 4
   1.255 +
   1.256 +/*
   1.257 + * For each scanline fetched from source image with PAD repeat:
   1.258 + * - calculate how many pixels need to be padded on the left side
   1.259 + * - calculate how many pixels need to be padded on the right side
   1.260 + * - update width to only count pixels which are fetched from the image
   1.261 + * All this information is returned via 'width', 'left_pad', 'right_pad'
   1.262 + * arguments. The code is assuming that 'unit_x' is positive.
   1.263 + *
   1.264 + * Note: 64-bit math is used in order to avoid potential overflows, which
   1.265 + *       is probably excessive in many cases. This particular function
   1.266 + *       may need its own correctness test and performance tuning.
   1.267 + */
   1.268 +static force_inline void
   1.269 +pad_repeat_get_scanline_bounds (int32_t         source_image_width,
   1.270 +				pixman_fixed_t  vx,
   1.271 +				pixman_fixed_t  unit_x,
   1.272 +				int32_t *       width,
   1.273 +				int32_t *       left_pad,
   1.274 +				int32_t *       right_pad)
   1.275 +{
   1.276 +    int64_t max_vx = (int64_t) source_image_width << 16;
   1.277 +    int64_t tmp;
   1.278 +    if (vx < 0)
   1.279 +    {
   1.280 +	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
   1.281 +	if (tmp > *width)
   1.282 +	{
   1.283 +	    *left_pad = *width;
   1.284 +	    *width = 0;
   1.285 +	}
   1.286 +	else
   1.287 +	{
   1.288 +	    *left_pad = (int32_t) tmp;
   1.289 +	    *width -= (int32_t) tmp;
   1.290 +	}
   1.291 +    }
   1.292 +    else
   1.293 +    {
   1.294 +	*left_pad = 0;
   1.295 +    }
   1.296 +    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
   1.297 +    if (tmp < 0)
   1.298 +    {
   1.299 +	*right_pad = *width;
   1.300 +	*width = 0;
   1.301 +    }
   1.302 +    else if (tmp >= *width)
   1.303 +    {
   1.304 +	*right_pad = 0;
   1.305 +    }
   1.306 +    else
   1.307 +    {
   1.308 +	*right_pad = *width - (int32_t) tmp;
   1.309 +	*width = (int32_t) tmp;
   1.310 +    }
   1.311 +}
   1.312 +
   1.313 +/* A macroified version of specialized nearest scalers for some
   1.314 + * common 8888 and 565 formats. It supports SRC and OVER ops.
   1.315 + *
   1.316 + * There are two repeat versions, one that handles repeat normal,
   1.317 + * and one without repeat handling that only works if the src region
   1.318 + * used is completely covered by the pre-repeated source samples.
   1.319 + *
   1.320 + * The loops are unrolled to process two pixels per iteration for better
   1.321 + * performance on most CPU architectures (superscalar processors
   1.322 + * can issue several operations simultaneously, other processors can hide
   1.323 + * instructions latencies by pipelining operations). Unrolling more
   1.324 + * does not make much sense because the compiler will start running out
   1.325 + * of spare registers soon.
   1.326 + */
   1.327 +
   1.328 +#define GET_8888_ALPHA(s) ((s) >> 24)
   1.329 + /* This is not actually used since we don't have an OVER with
   1.330 +    565 source, but it is needed to build. */
   1.331 +#define GET_0565_ALPHA(s) 0xff
   1.332 +#define GET_x888_ALPHA(s) 0xff
   1.333 +
   1.334 +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
   1.335 +			      src_type_t, dst_type_t, OP, repeat_mode)				\
   1.336 +static force_inline void									\
   1.337 +scanline_func_name (dst_type_t       *dst,							\
   1.338 +		    const src_type_t *src,							\
   1.339 +		    int32_t           w,							\
   1.340 +		    pixman_fixed_t    vx,							\
   1.341 +		    pixman_fixed_t    unit_x,							\
   1.342 +		    pixman_fixed_t    src_width_fixed,						\
   1.343 +		    pixman_bool_t     fully_transparent_src)					\
   1.344 +{												\
   1.345 +	uint32_t   d;										\
   1.346 +	src_type_t s1, s2;									\
   1.347 +	uint8_t    a1, a2;									\
   1.348 +	int        x1, x2;									\
   1.349 +												\
   1.350 +	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
   1.351 +	    return;										\
   1.352 +												\
   1.353 +	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
   1.354 +	    abort();										\
   1.355 +												\
   1.356 +	while ((w -= 2) >= 0)									\
   1.357 +	{											\
   1.358 +	    x1 = pixman_fixed_to_int (vx);							\
   1.359 +	    vx += unit_x;									\
   1.360 +	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   1.361 +	    {											\
   1.362 +		/* This works because we know that unit_x is positive */			\
   1.363 +		while (vx >= 0)									\
   1.364 +		    vx -= src_width_fixed;							\
   1.365 +	    }											\
   1.366 +	    s1 = *(src + x1);									\
   1.367 +												\
   1.368 +	    x2 = pixman_fixed_to_int (vx);							\
   1.369 +	    vx += unit_x;									\
   1.370 +	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   1.371 +	    {											\
   1.372 +		/* This works because we know that unit_x is positive */			\
   1.373 +		while (vx >= 0)									\
   1.374 +		    vx -= src_width_fixed;							\
   1.375 +	    }											\
   1.376 +	    s2 = *(src + x2);									\
   1.377 +												\
   1.378 +	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
   1.379 +	    {											\
   1.380 +		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
   1.381 +		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
   1.382 +												\
   1.383 +		if (a1 == 0xff)									\
   1.384 +		{										\
   1.385 +		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   1.386 +		}										\
   1.387 +		else if (s1)									\
   1.388 +		{										\
   1.389 +		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
   1.390 +		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
   1.391 +		    a1 ^= 0xff;									\
   1.392 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
   1.393 +		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
   1.394 +		}										\
   1.395 +		dst++;										\
   1.396 +												\
   1.397 +		if (a2 == 0xff)									\
   1.398 +		{										\
   1.399 +		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
   1.400 +		}										\
   1.401 +		else if (s2)									\
   1.402 +		{										\
   1.403 +		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
   1.404 +		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
   1.405 +		    a2 ^= 0xff;									\
   1.406 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
   1.407 +		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
   1.408 +		}										\
   1.409 +		dst++;										\
   1.410 +	    }											\
   1.411 +	    else /* PIXMAN_OP_SRC */								\
   1.412 +	    {											\
   1.413 +		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   1.414 +		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
   1.415 +	    }											\
   1.416 +	}											\
   1.417 +												\
   1.418 +	if (w & 1)										\
   1.419 +	{											\
   1.420 +	    x1 = pixman_fixed_to_int (vx);							\
   1.421 +	    s1 = *(src + x1);									\
   1.422 +												\
   1.423 +	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
   1.424 +	    {											\
   1.425 +		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
   1.426 +												\
   1.427 +		if (a1 == 0xff)									\
   1.428 +		{										\
   1.429 +		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   1.430 +		}										\
   1.431 +		else if (s1)									\
   1.432 +		{										\
   1.433 +		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
   1.434 +		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
   1.435 +		    a1 ^= 0xff;									\
   1.436 +		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
   1.437 +		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
   1.438 +		}										\
   1.439 +		dst++;										\
   1.440 +	    }											\
   1.441 +	    else /* PIXMAN_OP_SRC */								\
   1.442 +	    {											\
   1.443 +		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
   1.444 +	    }											\
   1.445 +	}											\
   1.446 +}
   1.447 +
   1.448 +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   1.449 +				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
   1.450 +static void											\
   1.451 +fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
   1.452 +						   pixman_composite_info_t *info)               \
   1.453 +{												\
   1.454 +    PIXMAN_COMPOSITE_ARGS (info);					                        \
   1.455 +    dst_type_t *dst_line;						                        \
   1.456 +    mask_type_t *mask_line;									\
   1.457 +    src_type_t *src_first_line;									\
   1.458 +    int       y;										\
   1.459 +    pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
   1.460 +    pixman_fixed_t max_vy;									\
   1.461 +    pixman_vector_t v;										\
   1.462 +    pixman_fixed_t vx, vy;									\
   1.463 +    pixman_fixed_t unit_x, unit_y;								\
   1.464 +    int32_t left_pad, right_pad;								\
   1.465 +												\
   1.466 +    src_type_t *src;										\
   1.467 +    dst_type_t *dst;										\
   1.468 +    mask_type_t solid_mask;									\
   1.469 +    const mask_type_t *mask = &solid_mask;							\
   1.470 +    int src_stride, mask_stride, dst_stride;							\
   1.471 +												\
   1.472 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
   1.473 +    if (have_mask)										\
   1.474 +    {												\
   1.475 +	if (mask_is_solid)									\
   1.476 +	    solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
   1.477 +	else											\
   1.478 +	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
   1.479 +				   mask_stride, mask_line, 1);					\
   1.480 +    }												\
   1.481 +    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
   1.482 +     * transformed from destination space to source space */					\
   1.483 +    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
   1.484 +												\
   1.485 +    /* reference point is the center of the pixel */						\
   1.486 +    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
   1.487 +    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
   1.488 +    v.vector[2] = pixman_fixed_1;								\
   1.489 +												\
   1.490 +    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
   1.491 +	return;											\
   1.492 +												\
   1.493 +    unit_x = src_image->common.transform->matrix[0][0];						\
   1.494 +    unit_y = src_image->common.transform->matrix[1][1];						\
   1.495 +												\
   1.496 +    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
   1.497 +    v.vector[0] -= pixman_fixed_e;								\
   1.498 +    v.vector[1] -= pixman_fixed_e;								\
   1.499 +												\
   1.500 +    vx = v.vector[0];										\
   1.501 +    vy = v.vector[1];										\
   1.502 +												\
   1.503 +    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
   1.504 +    {												\
   1.505 +	max_vy = pixman_int_to_fixed (src_image->bits.height);					\
   1.506 +												\
   1.507 +	/* Clamp repeating positions inside the actual samples */				\
   1.508 +	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
   1.509 +	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
   1.510 +    }												\
   1.511 +												\
   1.512 +    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
   1.513 +	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
   1.514 +    {												\
   1.515 +	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
   1.516 +					&width, &left_pad, &right_pad);				\
   1.517 +	vx += left_pad * unit_x;								\
   1.518 +    }												\
   1.519 +												\
   1.520 +    while (--height >= 0)									\
   1.521 +    {												\
   1.522 +	dst = dst_line;										\
   1.523 +	dst_line += dst_stride;									\
   1.524 +	if (have_mask && !mask_is_solid)							\
   1.525 +	{											\
   1.526 +	    mask = mask_line;									\
   1.527 +	    mask_line += mask_stride;								\
   1.528 +	}											\
   1.529 +												\
   1.530 +	y = pixman_fixed_to_int (vy);								\
   1.531 +	vy += unit_y;										\
   1.532 +	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   1.533 +	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
   1.534 +	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
   1.535 +	{											\
   1.536 +	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
   1.537 +	    src = src_first_line + src_stride * y;						\
   1.538 +	    if (left_pad > 0)									\
   1.539 +	    {											\
   1.540 +		scanline_func (mask, dst,							\
   1.541 +			       src + src_image->bits.width - src_image->bits.width + 1,		\
   1.542 +			       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
   1.543 +	    }											\
   1.544 +	    if (width > 0)									\
   1.545 +	    {											\
   1.546 +		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
   1.547 +			       dst + left_pad, src + src_image->bits.width, width,		\
   1.548 +			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
   1.549 +	    }											\
   1.550 +	    if (right_pad > 0)									\
   1.551 +	    {											\
   1.552 +		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
   1.553 +			       dst + left_pad + width, src + src_image->bits.width,		\
   1.554 +			       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
   1.555 +	    }											\
   1.556 +	}											\
   1.557 +	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
   1.558 +	{											\
   1.559 +	    static const src_type_t zero[1] = { 0 };						\
   1.560 +	    if (y < 0 || y >= src_image->bits.height)						\
   1.561 +	    {											\
   1.562 +		scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
   1.563 +			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
   1.564 +		continue;									\
   1.565 +	    }											\
   1.566 +	    src = src_first_line + src_stride * y;						\
   1.567 +	    if (left_pad > 0)									\
   1.568 +	    {											\
   1.569 +		scanline_func (mask, dst, zero + 1, left_pad,					\
   1.570 +			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
   1.571 +	    }											\
   1.572 +	    if (width > 0)									\
   1.573 +	    {											\
   1.574 +		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
   1.575 +			       dst + left_pad, src + src_image->bits.width, width,		\
   1.576 +			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
   1.577 +	    }											\
   1.578 +	    if (right_pad > 0)									\
   1.579 +	    {											\
   1.580 +		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
   1.581 +			       dst + left_pad + width, zero + 1, right_pad,			\
   1.582 +			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
   1.583 +	    }											\
   1.584 +	}											\
   1.585 +	else											\
   1.586 +	{											\
   1.587 +	    src = src_first_line + src_stride * y;						\
   1.588 +	    scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
   1.589 +			   unit_x, src_width_fixed, FALSE);					\
   1.590 +	}											\
   1.591 +    }												\
   1.592 +}
   1.593 +
   1.594 +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
   1.595 +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   1.596 +				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
   1.597 +	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   1.598 +				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
   1.599 +
   1.600 +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
   1.601 +			      repeat_mode)							\
   1.602 +    static force_inline void									\
   1.603 +    scanline_func##scale_func_name##_wrapper (							\
   1.604 +		    const uint8_t    *mask,							\
   1.605 +		    dst_type_t       *dst,							\
   1.606 +		    const src_type_t *src,							\
   1.607 +		    int32_t          w,								\
   1.608 +		    pixman_fixed_t   vx,							\
   1.609 +		    pixman_fixed_t   unit_x,							\
   1.610 +		    pixman_fixed_t   max_vx,							\
   1.611 +		    pixman_bool_t    fully_transparent_src)					\
   1.612 +    {												\
   1.613 +	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
   1.614 +    }												\
   1.615 +    FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
   1.616 +			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
   1.617 +
   1.618 +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
   1.619 +			      repeat_mode)							\
   1.620 +	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
   1.621 +			      dst_type_t, repeat_mode)
   1.622 +
   1.623 +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
   1.624 +		     src_type_t, dst_type_t, OP, repeat_mode)				\
   1.625 +    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
   1.626 +			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
   1.627 +			  OP, repeat_mode)						\
   1.628 +    FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
   1.629 +			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
   1.630 +			  src_type_t, dst_type_t, repeat_mode)
   1.631 +
   1.632 +
   1.633 +#define SCALED_NEAREST_FLAGS						\
   1.634 +    (FAST_PATH_SCALE_TRANSFORM	|					\
   1.635 +     FAST_PATH_NO_ALPHA_MAP	|					\
   1.636 +     FAST_PATH_NEAREST_FILTER	|					\
   1.637 +     FAST_PATH_NO_ACCESSORS	|					\
   1.638 +     FAST_PATH_NARROW_FORMAT)
   1.639 +
   1.640 +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
   1.641 +    {   PIXMAN_OP_ ## op,						\
   1.642 +	PIXMAN_ ## s,							\
   1.643 +	(SCALED_NEAREST_FLAGS		|				\
   1.644 +	 FAST_PATH_NORMAL_REPEAT	|				\
   1.645 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.646 +	PIXMAN_null, 0,							\
   1.647 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.648 +	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
   1.649 +    }
   1.650 +
   1.651 +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
   1.652 +    {   PIXMAN_OP_ ## op,						\
   1.653 +	PIXMAN_ ## s,							\
   1.654 +	(SCALED_NEAREST_FLAGS		|				\
   1.655 +	 FAST_PATH_PAD_REPEAT		|				\
   1.656 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.657 +	PIXMAN_null, 0,							\
   1.658 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.659 +	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
   1.660 +    }
   1.661 +
   1.662 +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
   1.663 +    {   PIXMAN_OP_ ## op,						\
   1.664 +	PIXMAN_ ## s,							\
   1.665 +	(SCALED_NEAREST_FLAGS		|				\
   1.666 +	 FAST_PATH_NONE_REPEAT		|				\
   1.667 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.668 +	PIXMAN_null, 0,							\
   1.669 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.670 +	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
   1.671 +    }
   1.672 +
   1.673 +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
   1.674 +    {   PIXMAN_OP_ ## op,						\
   1.675 +	PIXMAN_ ## s,							\
   1.676 +	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
   1.677 +	PIXMAN_null, 0,							\
   1.678 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.679 +	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
   1.680 +    }
   1.681 +
   1.682 +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
   1.683 +    {   PIXMAN_OP_ ## op,						\
   1.684 +	PIXMAN_ ## s,							\
   1.685 +	(SCALED_NEAREST_FLAGS		|				\
   1.686 +	 FAST_PATH_NORMAL_REPEAT	|				\
   1.687 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.688 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1.689 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.690 +	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
   1.691 +    }
   1.692 +
   1.693 +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
   1.694 +    {   PIXMAN_OP_ ## op,						\
   1.695 +	PIXMAN_ ## s,							\
   1.696 +	(SCALED_NEAREST_FLAGS		|				\
   1.697 +	 FAST_PATH_PAD_REPEAT		|				\
   1.698 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.699 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1.700 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.701 +	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
   1.702 +    }
   1.703 +
   1.704 +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
   1.705 +    {   PIXMAN_OP_ ## op,						\
   1.706 +	PIXMAN_ ## s,							\
   1.707 +	(SCALED_NEAREST_FLAGS		|				\
   1.708 +	 FAST_PATH_NONE_REPEAT		|				\
   1.709 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.710 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1.711 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.712 +	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
   1.713 +    }
   1.714 +
   1.715 +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
   1.716 +    {   PIXMAN_OP_ ## op,						\
   1.717 +	PIXMAN_ ## s,							\
   1.718 +	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
   1.719 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1.720 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.721 +	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
   1.722 +    }
   1.723 +
   1.724 +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
   1.725 +    {   PIXMAN_OP_ ## op,						\
   1.726 +	PIXMAN_ ## s,							\
   1.727 +	(SCALED_NEAREST_FLAGS		|				\
   1.728 +	 FAST_PATH_NORMAL_REPEAT	|				\
   1.729 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.730 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1.731 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.732 +	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
   1.733 +    }
   1.734 +
   1.735 +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
   1.736 +    {   PIXMAN_OP_ ## op,						\
   1.737 +	PIXMAN_ ## s,							\
   1.738 +	(SCALED_NEAREST_FLAGS		|				\
   1.739 +	 FAST_PATH_PAD_REPEAT		|				\
   1.740 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.741 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1.742 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.743 +	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
   1.744 +    }
   1.745 +
   1.746 +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
   1.747 +    {   PIXMAN_OP_ ## op,						\
   1.748 +	PIXMAN_ ## s,							\
   1.749 +	(SCALED_NEAREST_FLAGS		|				\
   1.750 +	 FAST_PATH_NONE_REPEAT		|				\
   1.751 +	 FAST_PATH_X_UNIT_POSITIVE),					\
   1.752 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1.753 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.754 +	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
   1.755 +    }
   1.756 +
   1.757 +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
   1.758 +    {   PIXMAN_OP_ ## op,						\
   1.759 +	PIXMAN_ ## s,							\
   1.760 +	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
   1.761 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1.762 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1.763 +	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
   1.764 +    }
   1.765 +
   1.766 +/* Prefer the use of 'cover' variant, because it is faster */
   1.767 +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
   1.768 +    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
   1.769 +    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
   1.770 +    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
   1.771 +    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
   1.772 +
   1.773 +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
   1.774 +    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
   1.775 +    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
   1.776 +    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
   1.777 +
   1.778 +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
   1.779 +    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
   1.780 +    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
   1.781 +    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
   1.782 +
   1.783 +/*****************************************************************************/
   1.784 +
   1.785 +/*
   1.786 + * Identify 5 zones in each scanline for bilinear scaling. Depending on
   1.787 + * whether 2 pixels to be interpolated are fetched from the image itself,
   1.788 + * from the padding area around it or from both image and padding area.
   1.789 + */
   1.790 +static force_inline void
   1.791 +bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
   1.792 +					 pixman_fixed_t  vx,
   1.793 +					 pixman_fixed_t  unit_x,
   1.794 +					 int32_t *       left_pad,
   1.795 +					 int32_t *       left_tz,
   1.796 +					 int32_t *       width,
   1.797 +					 int32_t *       right_tz,
   1.798 +					 int32_t *       right_pad)
   1.799 +{
   1.800 +	int width1 = *width, left_pad1, right_pad1;
   1.801 +	int width2 = *width, left_pad2, right_pad2;
   1.802 +
   1.803 +	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
   1.804 +					&width1, &left_pad1, &right_pad1);
   1.805 +	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
   1.806 +					unit_x, &width2, &left_pad2, &right_pad2);
   1.807 +
   1.808 +	*left_pad = left_pad2;
   1.809 +	*left_tz = left_pad1 - left_pad2;
   1.810 +	*right_tz = right_pad2 - right_pad1;
   1.811 +	*right_pad = right_pad1;
   1.812 +	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
   1.813 +}
   1.814 +
   1.815 +/*
   1.816 + * Main loop template for single pass bilinear scaling. It needs to be
   1.817 + * provided with 'scanline_func' which should do the compositing operation.
   1.818 + * The needed function has the following prototype:
   1.819 + *
   1.820 + *	scanline_func (dst_type_t *       dst,
   1.821 + *		       const mask_type_ * mask,
   1.822 + *		       const src_type_t * src_top,
   1.823 + *		       const src_type_t * src_bottom,
   1.824 + *		       int32_t            width,
   1.825 + *		       int                weight_top,
   1.826 + *		       int                weight_bottom,
   1.827 + *		       pixman_fixed_t     vx,
   1.828 + *		       pixman_fixed_t     unit_x,
   1.829 + *		       pixman_fixed_t     max_vx,
   1.830 + *		       pixman_bool_t      zero_src)
   1.831 + *
   1.832 + * Where:
   1.833 + *  dst                 - destination scanline buffer for storing results
   1.834 + *  mask                - mask buffer (or single value for solid mask)
   1.835 + *  src_top, src_bottom - two source scanlines
   1.836 + *  width               - number of pixels to process
   1.837 + *  weight_top          - weight of the top row for interpolation
   1.838 + *  weight_bottom       - weight of the bottom row for interpolation
   1.839 + *  vx                  - initial position for fetching the first pair of
   1.840 + *                        pixels from the source buffer
   1.841 + *  unit_x              - position increment needed to move to the next pair
   1.842 + *                        of pixels
   1.843 + *  max_vx              - image size as a fixed point value, can be used for
   1.844 + *                        implementing NORMAL repeat (when it is supported)
   1.845 + *  zero_src            - boolean hint variable, which is set to TRUE when
   1.846 + *                        all source pixels are fetched from zero padding
   1.847 + *                        zone for NONE repeat
   1.848 + *
   1.849 + * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
   1.850 + *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
   1.851 + *       for NONE repeat when handling fuzzy antialiased top or bottom image
   1.852 + *       edges. Also both top and bottom weight variables are guaranteed to
   1.853 + *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
   1.854 + *       For example, the weights can fit into unsigned byte or be used
   1.855 + *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
   1.856 + *       precision.
   1.857 + */
   1.858 +
   1.859 +/* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
   1.860 + * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
   1.861 + * combine), "op_func" may be NULL, in this case we keep old behavior.
   1.862 + * This is ugly and gcc issues some warnings, but works.
   1.863 + *
   1.864 + * An advice: clang has much better error reporting than gcc for deeply nested macros.
   1.865 + */
   1.866 +
   1.867 +#define	scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,            \
   1.868 +                      scanline_buf, mask, src_top, src_bottom, width,                           \
   1.869 +                      weight_top, weight_bottom, vx, unit_x, max_vx, zero_src)                  \
   1.870 + do {                                                                                           \
   1.871 +		if (op_func != NULL)								\
   1.872 +		{										\
   1.873 +		    fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
   1.874 +                        (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));   \
   1.875 +		    ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
   1.876 +			((dst), (mask), (src_type_t *)scanline_buf, (width));			\
   1.877 +		}										\
   1.878 +		else										\
   1.879 +		{										\
   1.880 +		    fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top),  \
   1.881 +                                (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));         \
   1.882 +		}                                                                               \
   1.883 +  } while (0)
   1.884 +
   1.885 +
   1.886 +#define SCANLINE_BUFFER_LENGTH 3072
   1.887 +
   1.888 +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t,		\
   1.889 +				  mask_type_t, dst_type_t, repeat_mode, flags)			\
   1.890 +static void											\
   1.891 +fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
   1.892 +						   pixman_composite_info_t *info)		\
   1.893 +{												\
   1.894 +    PIXMAN_COMPOSITE_ARGS (info);								\
   1.895 +    dst_type_t *dst_line;									\
   1.896 +    mask_type_t *mask_line;									\
   1.897 +    src_type_t *src_first_line;									\
   1.898 +    int       y1, y2;										\
   1.899 +    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
   1.900 +    pixman_vector_t v;										\
   1.901 +    pixman_fixed_t vx, vy;									\
   1.902 +    pixman_fixed_t unit_x, unit_y;								\
   1.903 +    int32_t left_pad, left_tz, right_tz, right_pad;						\
   1.904 +												\
   1.905 +    dst_type_t *dst;										\
   1.906 +    mask_type_t solid_mask;									\
   1.907 +    const mask_type_t *mask = &solid_mask;							\
   1.908 +    int src_stride, mask_stride, dst_stride;							\
   1.909 +												\
   1.910 +    int src_width;										\
   1.911 +    pixman_fixed_t src_width_fixed;								\
   1.912 +    int max_x;											\
   1.913 +    pixman_bool_t need_src_extension;								\
   1.914 +                                                                                                \
   1.915 +    uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH];                                     \
   1.916 +    uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;                               \
   1.917 +												\
   1.918 +    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
   1.919 +    if (flags & FLAG_HAVE_SOLID_MASK)								\
   1.920 +    {												\
   1.921 +	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
   1.922 +	mask_stride = 0;									\
   1.923 +    }												\
   1.924 +    else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
   1.925 +    {												\
   1.926 +	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
   1.927 +			       mask_stride, mask_line, 1);					\
   1.928 +    }												\
   1.929 +												\
   1.930 +    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
   1.931 +     * transformed from destination space to source space */					\
   1.932 +    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
   1.933 +												\
   1.934 +    /* reference point is the center of the pixel */						\
   1.935 +    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
   1.936 +    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
   1.937 +    v.vector[2] = pixman_fixed_1;								\
   1.938 +												\
   1.939 +    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
   1.940 +	return;											\
   1.941 +												\
   1.942 +    unit_x = src_image->common.transform->matrix[0][0];						\
   1.943 +    unit_y = src_image->common.transform->matrix[1][1];						\
   1.944 +												\
   1.945 +    v.vector[0] -= pixman_fixed_1 / 2;								\
   1.946 +    v.vector[1] -= pixman_fixed_1 / 2;								\
   1.947 +												\
   1.948 +    vy = v.vector[1];										\
   1.949 +												\
   1.950 +    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
   1.951 +	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
   1.952 +    {												\
   1.953 +	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
   1.954 +					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
   1.955 +	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
   1.956 +	{											\
   1.957 +	    /* PAD repeat does not need special handling for 'transition zones' and */		\
   1.958 +	    /* they can be combined with 'padding zones' safely */				\
   1.959 +	    left_pad += left_tz;								\
   1.960 +	    right_pad += right_tz;								\
   1.961 +	    left_tz = right_tz = 0;								\
   1.962 +	}											\
   1.963 +	v.vector[0] += left_pad * unit_x;							\
   1.964 +    }												\
   1.965 +												\
   1.966 +    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
   1.967 +    {												\
   1.968 +	vx = v.vector[0];									\
   1.969 +	repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
   1.970 +	max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
   1.971 +												\
   1.972 +	if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
   1.973 +	{											\
   1.974 +	    src_width = 0;									\
   1.975 +												\
   1.976 +	    while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
   1.977 +		src_width += src_image->bits.width;						\
   1.978 +												\
   1.979 +	    need_src_extension = TRUE;								\
   1.980 +	}											\
   1.981 +	else											\
   1.982 +	{											\
   1.983 +	    src_width = src_image->bits.width;							\
   1.984 +	    need_src_extension = FALSE;								\
   1.985 +	}											\
   1.986 +												\
   1.987 +	src_width_fixed = pixman_int_to_fixed (src_width);					\
   1.988 +    }												\
   1.989 +                                                                                                \
   1.990 +    if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer))          \
   1.991 +    {                                                                                           \
   1.992 +	scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t));                         \
   1.993 +                                                                                                \
   1.994 +	if (!scanline_buffer)                                                                   \
   1.995 +	    return;                                                                             \
   1.996 +    }                                                                                           \
   1.997 +												\
   1.998 +    while (--height >= 0)									\
   1.999 +    {												\
  1.1000 +	int weight1, weight2;									\
  1.1001 +	dst = dst_line;										\
  1.1002 +	dst_line += dst_stride;									\
  1.1003 +	vx = v.vector[0];									\
  1.1004 +	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
  1.1005 +	{											\
  1.1006 +	    mask = mask_line;									\
  1.1007 +	    mask_line += mask_stride;								\
  1.1008 +	}											\
  1.1009 +												\
  1.1010 +	y1 = pixman_fixed_to_int (vy);								\
  1.1011 +	weight2 = pixman_fixed_to_bilinear_weight (vy);						\
  1.1012 +	if (weight2)										\
  1.1013 +	{											\
  1.1014 +	    /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
  1.1015 +	    y2 = y1 + 1;									\
  1.1016 +	    weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
  1.1017 +	}											\
  1.1018 +	else											\
  1.1019 +	{											\
  1.1020 +	    /* set both top and bottom row to the same scanline and tweak weights */		\
  1.1021 +	    y2 = y1;										\
  1.1022 +	    weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
  1.1023 +	}											\
  1.1024 +	vy += unit_y;										\
  1.1025 +	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
  1.1026 +	{											\
  1.1027 +	    src_type_t *src1, *src2;								\
  1.1028 +	    src_type_t buf1[2];									\
  1.1029 +	    src_type_t buf2[2];									\
  1.1030 +	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
  1.1031 +	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
  1.1032 +	    src1 = src_first_line + src_stride * y1;						\
  1.1033 +	    src2 = src_first_line + src_stride * y2;						\
  1.1034 +												\
  1.1035 +	    if (left_pad > 0)									\
  1.1036 +	    {											\
  1.1037 +		buf1[0] = buf1[1] = src1[0];							\
  1.1038 +		buf2[0] = buf2[1] = src2[0];							\
  1.1039 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1040 +			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
  1.1041 +                               0, 0, 0, FALSE);	                                                \
  1.1042 +		dst += left_pad;								\
  1.1043 +		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1.1044 +		    mask += left_pad;								\
  1.1045 +	    }											\
  1.1046 +	    if (width > 0)									\
  1.1047 +	    {											\
  1.1048 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1049 +			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
  1.1050 +                               vx, unit_x, 0, FALSE);                                           \
  1.1051 +		dst += width;									\
  1.1052 +		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1.1053 +		    mask += width;								\
  1.1054 +	    }											\
  1.1055 +	    if (right_pad > 0)									\
  1.1056 +	    {											\
  1.1057 +		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
  1.1058 +		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
  1.1059 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1060 +			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
  1.1061 +                               0, 0, 0, FALSE);                                                 \
  1.1062 +	    }											\
  1.1063 +	}											\
  1.1064 +	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
  1.1065 +	{											\
  1.1066 +	    src_type_t *src1, *src2;								\
  1.1067 +	    src_type_t buf1[2];									\
  1.1068 +	    src_type_t buf2[2];									\
  1.1069 +	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
  1.1070 +	    if (y1 < 0)										\
  1.1071 +	    {											\
  1.1072 +		weight1 = 0;									\
  1.1073 +		y1 = 0;										\
  1.1074 +	    }											\
  1.1075 +	    if (y1 >= src_image->bits.height)							\
  1.1076 +	    {											\
  1.1077 +		weight1 = 0;									\
  1.1078 +		y1 = src_image->bits.height - 1;						\
  1.1079 +	    }											\
  1.1080 +	    if (y2 < 0)										\
  1.1081 +	    {											\
  1.1082 +		weight2 = 0;									\
  1.1083 +		y2 = 0;										\
  1.1084 +	    }											\
  1.1085 +	    if (y2 >= src_image->bits.height)							\
  1.1086 +	    {											\
  1.1087 +		weight2 = 0;									\
  1.1088 +		y2 = src_image->bits.height - 1;						\
  1.1089 +	    }											\
  1.1090 +	    src1 = src_first_line + src_stride * y1;						\
  1.1091 +	    src2 = src_first_line + src_stride * y2;						\
  1.1092 +												\
  1.1093 +	    if (left_pad > 0)									\
  1.1094 +	    {											\
  1.1095 +		buf1[0] = buf1[1] = 0;								\
  1.1096 +		buf2[0] = buf2[1] = 0;								\
  1.1097 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1098 +			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
  1.1099 +                               0, 0, 0, TRUE);	                                                \
  1.1100 +		dst += left_pad;								\
  1.1101 +		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1.1102 +		    mask += left_pad;								\
  1.1103 +	    }											\
  1.1104 +	    if (left_tz > 0)									\
  1.1105 +	    {											\
  1.1106 +		buf1[0] = 0;									\
  1.1107 +		buf1[1] = src1[0];								\
  1.1108 +		buf2[0] = 0;									\
  1.1109 +		buf2[1] = src2[0];								\
  1.1110 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1111 +			       scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2,	\
  1.1112 +			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
  1.1113 +		dst += left_tz;									\
  1.1114 +		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1.1115 +		    mask += left_tz;								\
  1.1116 +		vx += left_tz * unit_x;								\
  1.1117 +	    }											\
  1.1118 +	    if (width > 0)									\
  1.1119 +	    {											\
  1.1120 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1121 +			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
  1.1122 +                               vx, unit_x, 0, FALSE);                                           \
  1.1123 +		dst += width;									\
  1.1124 +		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1.1125 +		    mask += width;								\
  1.1126 +		vx += width * unit_x;								\
  1.1127 +	    }											\
  1.1128 +	    if (right_tz > 0)									\
  1.1129 +	    {											\
  1.1130 +		buf1[0] = src1[src_image->bits.width - 1];					\
  1.1131 +		buf1[1] = 0;									\
  1.1132 +		buf2[0] = src2[src_image->bits.width - 1];					\
  1.1133 +		buf2[1] = 0;									\
  1.1134 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1135 +			       scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2,   \
  1.1136 +			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
  1.1137 +		dst += right_tz;								\
  1.1138 +		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
  1.1139 +		    mask += right_tz;								\
  1.1140 +	    }											\
  1.1141 +	    if (right_pad > 0)									\
  1.1142 +	    {											\
  1.1143 +		buf1[0] = buf1[1] = 0;								\
  1.1144 +		buf2[0] = buf2[1] = 0;								\
  1.1145 +		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
  1.1146 +			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
  1.1147 +                               0, 0, 0, TRUE);	                                                \
  1.1148 +	    }											\
  1.1149 +	}											\
  1.1150 +	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
  1.1151 +	{											\
  1.1152 +	    int32_t	    num_pixels;								\
  1.1153 +	    int32_t	    width_remain;							\
  1.1154 +	    src_type_t *    src_line_top;							\
  1.1155 +	    src_type_t *    src_line_bottom;							\
  1.1156 +	    src_type_t	    buf1[2];								\
  1.1157 +	    src_type_t	    buf2[2];								\
  1.1158 +	    src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
  1.1159 +	    src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
  1.1160 +	    int		    i, j;								\
  1.1161 +												\
  1.1162 +	    repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
  1.1163 +	    repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
  1.1164 +	    src_line_top = src_first_line + src_stride * y1;					\
  1.1165 +	    src_line_bottom = src_first_line + src_stride * y2;					\
  1.1166 +												\
  1.1167 +	    if (need_src_extension)								\
  1.1168 +	    {											\
  1.1169 +		for (i=0; i<src_width;)								\
  1.1170 +		{										\
  1.1171 +		    for (j=0; j<src_image->bits.width; j++, i++)				\
  1.1172 +		    {										\
  1.1173 +			extended_src_line0[i] = src_line_top[j];				\
  1.1174 +			extended_src_line1[i] = src_line_bottom[j];				\
  1.1175 +		    }										\
  1.1176 +		}										\
  1.1177 +												\
  1.1178 +		src_line_top = &extended_src_line0[0];						\
  1.1179 +		src_line_bottom = &extended_src_line1[0];					\
  1.1180 +	    }											\
  1.1181 +												\
  1.1182 +	    /* Top & Bottom wrap around buffer */						\
  1.1183 +	    buf1[0] = src_line_top[src_width - 1];						\
  1.1184 +	    buf1[1] = src_line_top[0];								\
  1.1185 +	    buf2[0] = src_line_bottom[src_width - 1];						\
  1.1186 +	    buf2[1] = src_line_bottom[0];							\
  1.1187 +												\
  1.1188 +	    width_remain = width;								\
  1.1189 +												\
  1.1190 +	    while (width_remain > 0)								\
  1.1191 +	    {											\
  1.1192 +		/* We use src_width_fixed because it can make vx in original source range */	\
  1.1193 +		repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
  1.1194 +												\
  1.1195 +		/* Wrap around part */								\
  1.1196 +		if (pixman_fixed_to_int (vx) == src_width - 1)					\
  1.1197 +		{										\
  1.1198 +		    /* for positive unit_x							\
  1.1199 +		     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
  1.1200 +		     *										\
  1.1201 +		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
  1.1202 +		     * So we are safe from overflow.						\
  1.1203 +		     */										\
  1.1204 +		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
  1.1205 +												\
  1.1206 +		    if (num_pixels > width_remain)						\
  1.1207 +			num_pixels = width_remain;						\
  1.1208 +												\
  1.1209 +		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
  1.1210 +                                   dst, scanline_buffer, mask, buf1, buf2, num_pixels,          \
  1.1211 +				   weight1, weight2, pixman_fixed_frac(vx),			\
  1.1212 +				   unit_x, src_width_fixed, FALSE);				\
  1.1213 +												\
  1.1214 +		    width_remain -= num_pixels;							\
  1.1215 +		    vx += num_pixels * unit_x;							\
  1.1216 +		    dst += num_pixels;								\
  1.1217 +												\
  1.1218 +		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
  1.1219 +			mask += num_pixels;							\
  1.1220 +												\
  1.1221 +		    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
  1.1222 +		}										\
  1.1223 +												\
  1.1224 +		/* Normal scanline composite */							\
  1.1225 +		if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
  1.1226 +		{										\
  1.1227 +		    /* for positive unit_x							\
  1.1228 +		     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
  1.1229 +		     *										\
  1.1230 +		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
  1.1231 +		     * So we are safe from overflow here.					\
  1.1232 +		     */										\
  1.1233 +		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
  1.1234 +				  / unit_x) + 1;						\
  1.1235 +												\
  1.1236 +		    if (num_pixels > width_remain)						\
  1.1237 +			num_pixels = width_remain;						\
  1.1238 +												\
  1.1239 +		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
  1.1240 +                                   dst, scanline_buffer, mask, src_line_top, src_line_bottom,   \
  1.1241 +                                   num_pixels, weight1, weight2, vx, unit_x, src_width_fixed,   \
  1.1242 +                                   FALSE);	                                                \
  1.1243 +												\
  1.1244 +		    width_remain -= num_pixels;							\
  1.1245 +		    vx += num_pixels * unit_x;							\
  1.1246 +		    dst += num_pixels;								\
  1.1247 +												\
  1.1248 +		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
  1.1249 +		        mask += num_pixels;							\
  1.1250 +		}										\
  1.1251 +	    }											\
  1.1252 +	}											\
  1.1253 +	else											\
  1.1254 +	{											\
  1.1255 +	    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,       \
  1.1256 +                           scanline_buffer, mask,                                               \
  1.1257 +                           src_first_line + src_stride * y1,					\
  1.1258 +			   src_first_line + src_stride * y2, width,				\
  1.1259 +			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
  1.1260 +	}											\
  1.1261 +    }												\
  1.1262 +    if (scanline_buffer != (uint8_t *) stack_scanline_buffer)                                   \
  1.1263 +	free (scanline_buffer);                                                                 \
  1.1264 +}
  1.1265 +
  1.1266 +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
  1.1267 +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
  1.1268 +				  dst_type_t, repeat_mode, flags)				\
  1.1269 +	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
  1.1270 +				  dst_type_t, repeat_mode, flags)
  1.1271 +
  1.1272 +#define SCALED_BILINEAR_FLAGS						\
  1.1273 +    (FAST_PATH_SCALE_TRANSFORM	|					\
  1.1274 +     FAST_PATH_NO_ALPHA_MAP	|					\
  1.1275 +     FAST_PATH_BILINEAR_FILTER	|					\
  1.1276 +     FAST_PATH_NO_ACCESSORS	|					\
  1.1277 +     FAST_PATH_NARROW_FORMAT)
  1.1278 +
  1.1279 +#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
  1.1280 +    {   PIXMAN_OP_ ## op,						\
  1.1281 +	PIXMAN_ ## s,							\
  1.1282 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1283 +	 FAST_PATH_PAD_REPEAT		|				\
  1.1284 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1285 +	PIXMAN_null, 0,							\
  1.1286 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1287 +	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
  1.1288 +    }
  1.1289 +
  1.1290 +#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
  1.1291 +    {   PIXMAN_OP_ ## op,						\
  1.1292 +	PIXMAN_ ## s,							\
  1.1293 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1294 +	 FAST_PATH_NONE_REPEAT		|				\
  1.1295 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1296 +	PIXMAN_null, 0,							\
  1.1297 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1298 +	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
  1.1299 +    }
  1.1300 +
  1.1301 +#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
  1.1302 +    {   PIXMAN_OP_ ## op,						\
  1.1303 +	PIXMAN_ ## s,							\
  1.1304 +	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
  1.1305 +	PIXMAN_null, 0,							\
  1.1306 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1307 +	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
  1.1308 +    }
  1.1309 +
  1.1310 +#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
  1.1311 +    {   PIXMAN_OP_ ## op,						\
  1.1312 +	PIXMAN_ ## s,							\
  1.1313 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1314 +	 FAST_PATH_NORMAL_REPEAT	|				\
  1.1315 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1316 +	PIXMAN_null, 0,							\
  1.1317 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1318 +	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
  1.1319 +    }
  1.1320 +
  1.1321 +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
  1.1322 +    {   PIXMAN_OP_ ## op,						\
  1.1323 +	PIXMAN_ ## s,							\
  1.1324 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1325 +	 FAST_PATH_PAD_REPEAT		|				\
  1.1326 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1327 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1.1328 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1329 +	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
  1.1330 +    }
  1.1331 +
  1.1332 +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
  1.1333 +    {   PIXMAN_OP_ ## op,						\
  1.1334 +	PIXMAN_ ## s,							\
  1.1335 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1336 +	 FAST_PATH_NONE_REPEAT		|				\
  1.1337 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1338 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1.1339 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1340 +	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
  1.1341 +    }
  1.1342 +
  1.1343 +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
  1.1344 +    {   PIXMAN_OP_ ## op,						\
  1.1345 +	PIXMAN_ ## s,							\
  1.1346 +	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
  1.1347 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1.1348 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1349 +	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
  1.1350 +    }
  1.1351 +
  1.1352 +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
  1.1353 +    {   PIXMAN_OP_ ## op,						\
  1.1354 +	PIXMAN_ ## s,							\
  1.1355 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1356 +	 FAST_PATH_NORMAL_REPEAT	|				\
  1.1357 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1358 +	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
  1.1359 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1360 +	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
  1.1361 +    }
  1.1362 +
  1.1363 +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
  1.1364 +    {   PIXMAN_OP_ ## op,						\
  1.1365 +	PIXMAN_ ## s,							\
  1.1366 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1367 +	 FAST_PATH_PAD_REPEAT		|				\
  1.1368 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1369 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1.1370 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1371 +	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
  1.1372 +    }
  1.1373 +
  1.1374 +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
  1.1375 +    {   PIXMAN_OP_ ## op,						\
  1.1376 +	PIXMAN_ ## s,							\
  1.1377 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1378 +	 FAST_PATH_NONE_REPEAT		|				\
  1.1379 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1380 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1.1381 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1382 +	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
  1.1383 +    }
  1.1384 +
  1.1385 +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
  1.1386 +    {   PIXMAN_OP_ ## op,						\
  1.1387 +	PIXMAN_ ## s,							\
  1.1388 +	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
  1.1389 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1.1390 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1391 +	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
  1.1392 +    }
  1.1393 +
  1.1394 +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
  1.1395 +    {   PIXMAN_OP_ ## op,						\
  1.1396 +	PIXMAN_ ## s,							\
  1.1397 +	(SCALED_BILINEAR_FLAGS		|				\
  1.1398 +	 FAST_PATH_NORMAL_REPEAT	|				\
  1.1399 +	 FAST_PATH_X_UNIT_POSITIVE),					\
  1.1400 +	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
  1.1401 +	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
  1.1402 +	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
  1.1403 +    }
  1.1404 +
  1.1405 +/* Prefer the use of 'cover' variant, because it is faster */
  1.1406 +#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
  1.1407 +    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
  1.1408 +    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
  1.1409 +    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
  1.1410 +    SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
  1.1411 +
  1.1412 +#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
  1.1413 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
  1.1414 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
  1.1415 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
  1.1416 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
  1.1417 +
  1.1418 +#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
  1.1419 +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
  1.1420 +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
  1.1421 +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
  1.1422 +    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
  1.1423 +
  1.1424 +#endif

mercurial