gfx/cairo/pixman-8888-over-565.patch

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/pixman-8888-over-565.patch	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,712 @@
     1.4 +changeset:   96613:3e003f0b8026
     1.5 +tag:         2pass
     1.6 +tag:         qbase
     1.7 +tag:         qtip
     1.8 +tag:         tip
     1.9 +user:        Jeff Muizelaar <jmuizelaar@mozilla.com>
    1.10 +date:        Thu May 17 19:23:53 2012 -0400
    1.11 +summary:     Bug 757878. Add a fast path for 8888_over_565 with NEON. r=bgirard,joe
    1.12 +
    1.13 +diff --git a/gfx/cairo/libpixman/src/pixman-arm-common.h b/gfx/cairo/libpixman/src/pixman-arm-common.h
    1.14 +--- a/gfx/cairo/libpixman/src/pixman-arm-common.h
    1.15 ++++ b/gfx/cairo/libpixman/src/pixman-arm-common.h
    1.16 +@@ -355,26 +355,26 @@ scaled_bilinear_scanline_##cputype##_##n
    1.17 +     if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
    1.18 + 	return;                                                               \
    1.19 +     pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
    1.20 +                             dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
    1.21 + }                                                                             \
    1.22 +                                                                               \
    1.23 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
    1.24 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.25 +-                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)        \
    1.26 ++                       NULL, src_type, uint32_t, dst_type, COVER, FLAG_NONE)  \
    1.27 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
    1.28 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.29 +-                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)         \
    1.30 ++                       NULL, src_type, uint32_t, dst_type, NONE, FLAG_NONE)   \
    1.31 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
    1.32 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.33 +-                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)          \
    1.34 ++                       NULL, src_type, uint32_t, dst_type, PAD, FLAG_NONE)    \
    1.35 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op,                \
    1.36 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.37 +-                       src_type, uint32_t, dst_type, NORMAL,                  \
    1.38 ++                       NULL, src_type, uint32_t, dst_type, NORMAL,            \
    1.39 +                        FLAG_NONE)
    1.40 + 
    1.41 + 
    1.42 + #define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op,  \
    1.43 +                                                 src_type, dst_type)           \
    1.44 + void                                                                          \
    1.45 + pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (               \
    1.46 +                                                 dst_type *       dst,         \
    1.47 +@@ -404,25 +404,25 @@ scaled_bilinear_scanline_##cputype##_##n
    1.48 +     if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
    1.49 + 	return;                                                                   \
    1.50 +     pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
    1.51 +                       dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
    1.52 + }                                                                             \
    1.53 +                                                                               \
    1.54 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
    1.55 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.56 +-                       src_type, uint8_t, dst_type, COVER,                    \
    1.57 ++                       NULL, src_type, uint8_t, dst_type, COVER,              \
    1.58 +                        FLAG_HAVE_NON_SOLID_MASK)                              \
    1.59 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
    1.60 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.61 +-                       src_type, uint8_t, dst_type, NONE,                     \
    1.62 ++                       NULL, src_type, uint8_t, dst_type, NONE,               \
    1.63 +                        FLAG_HAVE_NON_SOLID_MASK)                              \
    1.64 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
    1.65 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.66 +-                       src_type, uint8_t, dst_type, PAD,                      \
    1.67 ++                       NULL, src_type, uint8_t, dst_type, PAD,                \
    1.68 +                        FLAG_HAVE_NON_SOLID_MASK)                              \
    1.69 + FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op,                \
    1.70 +                        scaled_bilinear_scanline_##cputype##_##name##_##op,    \
    1.71 +-                       src_type, uint8_t, dst_type, NORMAL,                   \
    1.72 ++                       NULL, src_type, uint8_t, dst_type, NORMAL,             \
    1.73 +                        FLAG_HAVE_NON_SOLID_MASK)
    1.74 + 
    1.75 + 
    1.76 + #endif
    1.77 +diff --git a/gfx/cairo/libpixman/src/pixman-arm-neon.c b/gfx/cairo/libpixman/src/pixman-arm-neon.c
    1.78 +--- a/gfx/cairo/libpixman/src/pixman-arm-neon.c
    1.79 ++++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c
    1.80 +@@ -140,16 +140,33 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST 
    1.81 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
    1.82 +                                          uint32_t, uint16_t)
    1.83 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
    1.84 +                                          uint16_t, uint32_t)
    1.85 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
    1.86 +                                          uint16_t, uint16_t)
    1.87 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
    1.88 +                                          uint32_t, uint32_t)
    1.89 ++static force_inline void
    1.90 ++pixman_scaled_bilinear_scanline_8888_8888_SRC (
    1.91 ++                                                uint32_t *       dst,
    1.92 ++                                                const uint32_t * mask,
    1.93 ++                                                const uint32_t * src_top,
    1.94 ++                                                const uint32_t * src_bottom,
    1.95 ++                                                int32_t          w,
    1.96 ++                                                int              wt,
    1.97 ++                                                int              wb,
    1.98 ++                                                pixman_fixed_t   vx,
    1.99 ++                                                pixman_fixed_t   unit_x,
   1.100 ++                                                pixman_fixed_t   max_vx,
   1.101 ++                                                pixman_bool_t    zero_src)
   1.102 ++{
   1.103 ++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w);
   1.104 ++}
   1.105 ++
   1.106 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
   1.107 +                                          uint32_t, uint32_t)
   1.108 + 
   1.109 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
   1.110 +                                             uint32_t, uint32_t)
   1.111 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
   1.112 +                                             uint32_t, uint16_t)
   1.113 + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
   1.114 +@@ -261,16 +278,38 @@ pixman_blt_neon (uint32_t *src_bits,
   1.115 + 		(uint32_t *)(((char *) src_bits) +
   1.116 + 		src_y * src_stride * 4 + src_x * 4), src_stride);
   1.117 + 	return TRUE;
   1.118 +     default:
   1.119 + 	return FALSE;
   1.120 +     }
   1.121 + }
   1.122 + 
   1.123 ++static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
   1.124 ++{
   1.125 ++    pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0);
   1.126 ++}
   1.127 ++
   1.128 ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER,
   1.129 ++			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.130 ++			       uint32_t, uint32_t, uint16_t,
   1.131 ++			       COVER, FLAG_NONE)
   1.132 ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER,
   1.133 ++			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.134 ++			       uint32_t, uint32_t, uint16_t,
   1.135 ++			       PAD, FLAG_NONE)
   1.136 ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER,
   1.137 ++			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.138 ++			       uint32_t, uint32_t, uint16_t,
   1.139 ++			       NONE, FLAG_NONE)
   1.140 ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER,
   1.141 ++			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.142 ++			       uint32_t, uint32_t, uint16_t,
   1.143 ++			       NORMAL, FLAG_NONE)
   1.144 ++
   1.145 + static const pixman_fast_path_t arm_neon_fast_paths[] =
   1.146 + {
   1.147 +     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
   1.148 +     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
   1.149 +     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
   1.150 +     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
   1.151 +     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
   1.152 +     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
   1.153 +@@ -414,16 +453,18 @@ static const pixman_fast_path_t arm_neon
   1.154 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
   1.155 + 
   1.156 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   1.157 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   1.158 + 
   1.159 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   1.160 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   1.161 + 
   1.162 ++    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
   1.163 ++
   1.164 +     { PIXMAN_OP_NONE },
   1.165 + };
   1.166 + 
   1.167 + static pixman_bool_t
   1.168 + arm_neon_blt (pixman_implementation_t *imp,
   1.169 +               uint32_t *               src_bits,
   1.170 +               uint32_t *               dst_bits,
   1.171 +               int                      src_stride,
   1.172 +diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c
   1.173 +--- a/gfx/cairo/libpixman/src/pixman-fast-path.c
   1.174 ++++ b/gfx/cairo/libpixman/src/pixman-fast-path.c
   1.175 +@@ -1356,63 +1356,63 @@ scaled_bilinear_scanline_565_565_SRC (ui
   1.176 +         vx += unit_x;
   1.177 +         *dst++ = d;
   1.178 +     }
   1.179 + }
   1.180 + 
   1.181 + #endif
   1.182 + 
   1.183 + FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
   1.184 +-			       scaled_bilinear_scanline_565_565_SRC,
   1.185 ++			       scaled_bilinear_scanline_565_565_SRC, NULL,
   1.186 + 			       uint16_t, uint32_t, uint16_t,
   1.187 + 			       COVER, FLAG_NONE)
   1.188 + FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
   1.189 +-			       scaled_bilinear_scanline_565_565_SRC,
   1.190 ++			       scaled_bilinear_scanline_565_565_SRC, NULL,
   1.191 + 			       uint16_t, uint32_t, uint16_t,
   1.192 + 			       PAD, FLAG_NONE)
   1.193 + FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
   1.194 +-			       scaled_bilinear_scanline_565_565_SRC,
   1.195 ++			       scaled_bilinear_scanline_565_565_SRC, NULL,
   1.196 + 			       uint16_t, uint32_t, uint16_t,
   1.197 + 			       NONE, FLAG_NONE)
   1.198 + FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
   1.199 +-			       scaled_bilinear_scanline_565_565_SRC,
   1.200 ++			       scaled_bilinear_scanline_565_565_SRC, NULL,
   1.201 + 			       uint16_t, uint32_t, uint16_t,
   1.202 + 			       NORMAL, FLAG_NONE)
   1.203 + 
   1.204 + FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
   1.205 +-			       scaled_bilinear_scanline_8888_565_OVER,
   1.206 ++			       scaled_bilinear_scanline_8888_565_OVER, NULL,
   1.207 + 			       uint32_t, uint32_t, uint16_t,
   1.208 + 			       COVER, FLAG_NONE)
   1.209 + FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
   1.210 +-			       scaled_bilinear_scanline_8888_565_OVER,
   1.211 ++			       scaled_bilinear_scanline_8888_565_OVER, NULL,
   1.212 + 			       uint32_t, uint32_t, uint16_t,
   1.213 + 			       PAD, FLAG_NONE)
   1.214 + FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
   1.215 +-			       scaled_bilinear_scanline_8888_565_OVER,
   1.216 ++			       scaled_bilinear_scanline_8888_565_OVER, NULL,
   1.217 + 			       uint32_t, uint32_t, uint16_t,
   1.218 + 			       NONE, FLAG_NONE)
   1.219 + FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
   1.220 +-			       scaled_bilinear_scanline_8888_565_OVER,
   1.221 ++			       scaled_bilinear_scanline_8888_565_OVER, NULL,
   1.222 + 			       uint32_t, uint32_t, uint16_t,
   1.223 + 			       NORMAL, FLAG_NONE)
   1.224 + 
   1.225 + FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
   1.226 +-			       scaled_bilinear_scanline_8888_8888_OVER,
   1.227 ++			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
   1.228 + 			       uint32_t, uint32_t, uint32_t,
   1.229 + 			       COVER, FLAG_NONE)
   1.230 + FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
   1.231 +-			       scaled_bilinear_scanline_8888_8888_OVER,
   1.232 ++			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
   1.233 + 			       uint32_t, uint32_t, uint32_t,
   1.234 + 			       PAD, FLAG_NONE)
   1.235 + FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
   1.236 +-			       scaled_bilinear_scanline_8888_8888_OVER,
   1.237 ++			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
   1.238 + 			       uint32_t, uint32_t, uint32_t,
   1.239 + 			       NONE, FLAG_NONE)
   1.240 + FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
   1.241 +-			       scaled_bilinear_scanline_8888_8888_OVER,
   1.242 ++			       scaled_bilinear_scanline_8888_8888_OVER, NULL,
   1.243 + 			       uint32_t, uint32_t, uint32_t,
   1.244 + 			       NORMAL, FLAG_NONE)
   1.245 + 
   1.246 + #define REPEAT_MIN_WIDTH    32
   1.247 + 
   1.248 + static void
   1.249 + fast_composite_tiled_repeat (pixman_implementation_t *imp,
   1.250 + 			     pixman_composite_info_t *info)
   1.251 +diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h
   1.252 +--- a/gfx/cairo/libpixman/src/pixman-inlines.h
   1.253 ++++ b/gfx/cairo/libpixman/src/pixman-inlines.h
   1.254 +@@ -816,18 +816,48 @@ bilinear_pad_repeat_get_scanline_bounds 
   1.255 +  *
   1.256 +  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
   1.257 +  *       but sometimes it may be less than that for NONE repeat when handling
   1.258 +  *       fuzzy antialiased top or bottom image edges. Also both top and
   1.259 +  *       bottom weight variables are guaranteed to have value in 0-255
   1.260 +  *       range and can fit into unsigned byte or be used with 8-bit SIMD
   1.261 +  *       multiplication instructions.
   1.262 +  */
   1.263 +-#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   1.264 +-				  dst_type_t, repeat_mode, flags)				\
   1.265 ++
   1.266 ++/* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
   1.267 ++ * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
   1.268 ++ * combine), "op_func" may be NULL, in this case we keep old behavior.
   1.269 ++ * This is ugly and gcc issues some warnings, but works.
   1.270 ++ *
   1.271 ++ * An advice: clang has much better error reporting than gcc for deeply nested macros.
   1.272 ++ */
   1.273 ++
   1.274 ++#define	scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,            \
   1.275 ++                      scanline_buf, mask, src_top, src_bottom, width,                           \
   1.276 ++                      weight_top, weight_bottom, vx, unit_x, max_vx, zero_src)                  \
   1.277 ++ do {                                                                                           \
   1.278 ++		if (op_func != NULL)								\
   1.279 ++		{										\
   1.280 ++		    fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
   1.281 ++                        (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));   \
   1.282 ++		    ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
   1.283 ++			((dst), (mask), (src_type_t *)scanline_buf, (width));			\
   1.284 ++		}										\
   1.285 ++		else										\
   1.286 ++		{										\
   1.287 ++		    fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top),  \
   1.288 ++                                (weight_bottom), (vx), (unit_x), (max_vx), (zero_src));         \
   1.289 ++		}                                                                               \
   1.290 ++  } while (0)
   1.291 ++
   1.292 ++
   1.293 ++#define SCANLINE_BUFFER_LENGTH 3072
   1.294 ++
   1.295 ++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t,		\
   1.296 ++				  mask_type_t, dst_type_t, repeat_mode, flags)			\
   1.297 + static void											\
   1.298 + fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
   1.299 + 						   pixman_composite_info_t *info)		\
   1.300 + {												\
   1.301 +     PIXMAN_COMPOSITE_ARGS (info);								\
   1.302 +     dst_type_t *dst_line;									\
   1.303 +     mask_type_t *mask_line;									\
   1.304 +     src_type_t *src_first_line;									\
   1.305 +@@ -842,16 +872,19 @@ fast_composite_scaled_bilinear ## scale_
   1.306 +     mask_type_t solid_mask;									\
   1.307 +     const mask_type_t *mask = &solid_mask;							\
   1.308 +     int src_stride, mask_stride, dst_stride;							\
   1.309 + 												\
   1.310 +     int src_width;										\
   1.311 +     pixman_fixed_t src_width_fixed;								\
   1.312 +     int max_x;											\
   1.313 +     pixman_bool_t need_src_extension;								\
   1.314 ++                                                                                                \
   1.315 ++    uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH];                                     \
   1.316 ++    uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;                               \
   1.317 + 												\
   1.318 +     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
   1.319 +     if (flags & FLAG_HAVE_SOLID_MASK)								\
   1.320 +     {												\
   1.321 + 	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
   1.322 + 	mask_stride = 0;									\
   1.323 +     }												\
   1.324 +     else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
   1.325 +@@ -914,16 +947,24 @@ fast_composite_scaled_bilinear ## scale_
   1.326 + 	else											\
   1.327 + 	{											\
   1.328 + 	    src_width = src_image->bits.width;							\
   1.329 + 	    need_src_extension = FALSE;								\
   1.330 + 	}											\
   1.331 + 												\
   1.332 + 	src_width_fixed = pixman_int_to_fixed (src_width);					\
   1.333 +     }												\
   1.334 ++                                                                                                \
   1.335 ++    if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer))          \
   1.336 ++    {                                                                                           \
   1.337 ++	scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t));                         \
   1.338 ++                                                                                                \
   1.339 ++	if (!scanline_buffer)                                                                   \
   1.340 ++	    return;                                                                             \
   1.341 ++    }                                                                                           \
   1.342 + 												\
   1.343 +     while (--height >= 0)									\
   1.344 +     {												\
   1.345 + 	int weight1, weight2;									\
   1.346 + 	dst = dst_line;										\
   1.347 + 	dst_line += dst_stride;									\
   1.348 + 	vx = v.vector[0];									\
   1.349 + 	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
   1.350 +@@ -956,36 +997,39 @@ fast_composite_scaled_bilinear ## scale_
   1.351 + 	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
   1.352 + 	    src1 = src_first_line + src_stride * y1;						\
   1.353 + 	    src2 = src_first_line + src_stride * y2;						\
   1.354 + 												\
   1.355 + 	    if (left_pad > 0)									\
   1.356 + 	    {											\
   1.357 + 		buf1[0] = buf1[1] = src1[0];							\
   1.358 + 		buf2[0] = buf2[1] = src2[0];							\
   1.359 +-		scanline_func (dst, mask,							\
   1.360 +-			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
   1.361 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.362 ++			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
   1.363 ++                               0, 0, 0, FALSE);	                                                \
   1.364 + 		dst += left_pad;								\
   1.365 + 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1.366 + 		    mask += left_pad;								\
   1.367 + 	    }											\
   1.368 + 	    if (width > 0)									\
   1.369 + 	    {											\
   1.370 +-		scanline_func (dst, mask,							\
   1.371 +-			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
   1.372 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.373 ++			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
   1.374 ++                               vx, unit_x, 0, FALSE);                                           \
   1.375 + 		dst += width;									\
   1.376 + 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1.377 + 		    mask += width;								\
   1.378 + 	    }											\
   1.379 + 	    if (right_pad > 0)									\
   1.380 + 	    {											\
   1.381 + 		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
   1.382 + 		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
   1.383 +-		scanline_func (dst, mask,							\
   1.384 +-			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
   1.385 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.386 ++			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
   1.387 ++                               0, 0, 0, FALSE);                                                 \
   1.388 + 	    }											\
   1.389 + 	}											\
   1.390 + 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
   1.391 + 	{											\
   1.392 + 	    src_type_t *src1, *src2;								\
   1.393 + 	    src_type_t buf1[2];									\
   1.394 + 	    src_type_t buf2[2];									\
   1.395 + 	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
   1.396 +@@ -1011,64 +1055,67 @@ fast_composite_scaled_bilinear ## scale_
   1.397 + 	    }											\
   1.398 + 	    src1 = src_first_line + src_stride * y1;						\
   1.399 + 	    src2 = src_first_line + src_stride * y2;						\
   1.400 + 												\
   1.401 + 	    if (left_pad > 0)									\
   1.402 + 	    {											\
   1.403 + 		buf1[0] = buf1[1] = 0;								\
   1.404 + 		buf2[0] = buf2[1] = 0;								\
   1.405 +-		scanline_func (dst, mask,							\
   1.406 +-			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
   1.407 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.408 ++			       scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2,   \
   1.409 ++                               0, 0, 0, TRUE);	                                                \
   1.410 + 		dst += left_pad;								\
   1.411 + 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1.412 + 		    mask += left_pad;								\
   1.413 + 	    }											\
   1.414 + 	    if (left_tz > 0)									\
   1.415 + 	    {											\
   1.416 + 		buf1[0] = 0;									\
   1.417 + 		buf1[1] = src1[0];								\
   1.418 + 		buf2[0] = 0;									\
   1.419 + 		buf2[1] = src2[0];								\
   1.420 +-		scanline_func (dst, mask,							\
   1.421 +-			       buf1, buf2, left_tz, weight1, weight2,				\
   1.422 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.423 ++			       scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2,	\
   1.424 + 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
   1.425 + 		dst += left_tz;									\
   1.426 + 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1.427 + 		    mask += left_tz;								\
   1.428 + 		vx += left_tz * unit_x;								\
   1.429 + 	    }											\
   1.430 + 	    if (width > 0)									\
   1.431 + 	    {											\
   1.432 +-		scanline_func (dst, mask,							\
   1.433 +-			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
   1.434 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.435 ++			       scanline_buffer, mask, src1, src2, width, weight1, weight2,      \
   1.436 ++                               vx, unit_x, 0, FALSE);                                           \
   1.437 + 		dst += width;									\
   1.438 + 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1.439 + 		    mask += width;								\
   1.440 + 		vx += width * unit_x;								\
   1.441 + 	    }											\
   1.442 + 	    if (right_tz > 0)									\
   1.443 + 	    {											\
   1.444 + 		buf1[0] = src1[src_image->bits.width - 1];					\
   1.445 + 		buf1[1] = 0;									\
   1.446 + 		buf2[0] = src2[src_image->bits.width - 1];					\
   1.447 + 		buf2[1] = 0;									\
   1.448 +-		scanline_func (dst, mask,							\
   1.449 +-			       buf1, buf2, right_tz, weight1, weight2,				\
   1.450 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.451 ++			       scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2,   \
   1.452 + 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
   1.453 + 		dst += right_tz;								\
   1.454 + 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1.455 + 		    mask += right_tz;								\
   1.456 + 	    }											\
   1.457 + 	    if (right_pad > 0)									\
   1.458 + 	    {											\
   1.459 + 		buf1[0] = buf1[1] = 0;								\
   1.460 + 		buf2[0] = buf2[1] = 0;								\
   1.461 +-		scanline_func (dst, mask,							\
   1.462 +-			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
   1.463 ++		scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,	\
   1.464 ++			       scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2,  \
   1.465 ++                               0, 0, 0, TRUE);	                                                \
   1.466 + 	    }											\
   1.467 + 	}											\
   1.468 + 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   1.469 + 	{											\
   1.470 + 	    int32_t	    num_pixels;								\
   1.471 + 	    int32_t	    width_remain;							\
   1.472 + 	    src_type_t *    src_line_top;							\
   1.473 + 	    src_type_t *    src_line_bottom;							\
   1.474 +@@ -1120,17 +1167,18 @@ fast_composite_scaled_bilinear ## scale_
   1.475 + 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
   1.476 + 		     * So we are safe from overflow.						\
   1.477 + 		     */										\
   1.478 + 		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
   1.479 + 												\
   1.480 + 		    if (num_pixels > width_remain)						\
   1.481 + 			num_pixels = width_remain;						\
   1.482 + 												\
   1.483 +-		    scanline_func (dst, mask, buf1, buf2, num_pixels,				\
   1.484 ++		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
   1.485 ++                                   dst, scanline_buffer, mask, buf1, buf2, num_pixels,          \
   1.486 + 				   weight1, weight2, pixman_fixed_frac(vx),			\
   1.487 + 				   unit_x, src_width_fixed, FALSE);				\
   1.488 + 												\
   1.489 + 		    width_remain -= num_pixels;							\
   1.490 + 		    vx += num_pixels * unit_x;							\
   1.491 + 		    dst += num_pixels;								\
   1.492 + 												\
   1.493 + 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
   1.494 +@@ -1149,41 +1197,47 @@ fast_composite_scaled_bilinear ## scale_
   1.495 + 		     * So we are safe from overflow here.					\
   1.496 + 		     */										\
   1.497 + 		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
   1.498 + 				  / unit_x) + 1;						\
   1.499 + 												\
   1.500 + 		    if (num_pixels > width_remain)						\
   1.501 + 			num_pixels = width_remain;						\
   1.502 + 												\
   1.503 +-		    scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,	\
   1.504 +-				   weight1, weight2, vx, unit_x, src_width_fixed, FALSE);	\
   1.505 ++		    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func,    \
   1.506 ++                                   dst, scanline_buffer, mask, src_line_top, src_line_bottom,   \
   1.507 ++                                   num_pixels, weight1, weight2, vx, unit_x, src_width_fixed,   \
   1.508 ++                                   FALSE);	                                                \
   1.509 + 												\
   1.510 + 		    width_remain -= num_pixels;							\
   1.511 + 		    vx += num_pixels * unit_x;							\
   1.512 + 		    dst += num_pixels;								\
   1.513 + 												\
   1.514 + 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
   1.515 + 		        mask += num_pixels;							\
   1.516 + 		}										\
   1.517 + 	    }											\
   1.518 + 	}											\
   1.519 + 	else											\
   1.520 + 	{											\
   1.521 +-	    scanline_func (dst, mask, src_first_line + src_stride * y1,				\
   1.522 ++	    scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst,       \
   1.523 ++                           scanline_buffer, mask,                                               \
   1.524 ++                           src_first_line + src_stride * y1,					\
   1.525 + 			   src_first_line + src_stride * y2, width,				\
   1.526 + 			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
   1.527 + 	}											\
   1.528 +     }												\
   1.529 ++    if (scanline_buffer != (uint8_t *) stack_scanline_buffer)                                   \
   1.530 ++	free (scanline_buffer);                                                                 \
   1.531 + }
   1.532 + 
   1.533 + /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
   1.534 +-#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   1.535 ++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
   1.536 + 				  dst_type_t, repeat_mode, flags)				\
   1.537 +-	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
   1.538 ++	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
   1.539 + 				  dst_type_t, repeat_mode, flags)
   1.540 + 
   1.541 + #define SCALED_BILINEAR_FLAGS						\
   1.542 +     (FAST_PATH_SCALE_TRANSFORM	|					\
   1.543 +      FAST_PATH_NO_ALPHA_MAP	|					\
   1.544 +      FAST_PATH_BILINEAR_FILTER	|					\
   1.545 +      FAST_PATH_NO_ACCESSORS	|					\
   1.546 +      FAST_PATH_NARROW_FORMAT)
   1.547 +diff --git a/gfx/cairo/libpixman/src/pixman-sse2.c b/gfx/cairo/libpixman/src/pixman-sse2.c
   1.548 +--- a/gfx/cairo/libpixman/src/pixman-sse2.c
   1.549 ++++ b/gfx/cairo/libpixman/src/pixman-sse2.c
   1.550 +@@ -5404,30 +5404,33 @@ scaled_bilinear_scanline_sse2_8888_8888_
   1.551 +     if (w & 1)
   1.552 +     {
   1.553 + 	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
   1.554 + 	*dst = pix1;
   1.555 +     }
   1.556 + 
   1.557 + }
   1.558 + 
   1.559 ++/* Add extra NULL argument to the existing bilinear fast paths to indicate
   1.560 ++ * that we don't need two-pass processing */
   1.561 ++
   1.562 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
   1.563 +-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
   1.564 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
   1.565 + 			       uint32_t, uint32_t, uint32_t,
   1.566 + 			       COVER, FLAG_NONE)
   1.567 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
   1.568 +-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
   1.569 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
   1.570 + 			       uint32_t, uint32_t, uint32_t,
   1.571 + 			       PAD, FLAG_NONE)
   1.572 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
   1.573 +-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
   1.574 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
   1.575 + 			       uint32_t, uint32_t, uint32_t,
   1.576 + 			       NONE, FLAG_NONE)
   1.577 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
   1.578 +-			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
   1.579 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, NULL,
   1.580 + 			       uint32_t, uint32_t, uint32_t,
   1.581 + 			       NORMAL, FLAG_NONE)
   1.582 + 
   1.583 + static force_inline void
   1.584 + scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
   1.585 + 					      const uint32_t * mask,
   1.586 + 					      const uint32_t * src_top,
   1.587 + 					      const uint32_t * src_bottom,
   1.588 +@@ -5505,32 +5508,66 @@ scaled_bilinear_scanline_sse2_8888_8888_
   1.589 + 	}
   1.590 + 
   1.591 + 	w--;
   1.592 + 	dst++;
   1.593 +     }
   1.594 + }
   1.595 + 
   1.596 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
   1.597 +-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
   1.598 ++			       scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
   1.599 + 			       uint32_t, uint32_t, uint32_t,
   1.600 + 			       COVER, FLAG_NONE)
   1.601 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
   1.602 +-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
   1.603 ++			       scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
   1.604 + 			       uint32_t, uint32_t, uint32_t,
   1.605 + 			       PAD, FLAG_NONE)
   1.606 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
   1.607 +-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
   1.608 ++			       scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
   1.609 + 			       uint32_t, uint32_t, uint32_t,
   1.610 + 			       NONE, FLAG_NONE)
   1.611 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
   1.612 +-			       scaled_bilinear_scanline_sse2_8888_8888_OVER,
   1.613 ++			       scaled_bilinear_scanline_sse2_8888_8888_OVER, NULL,
   1.614 + 			       uint32_t, uint32_t, uint32_t,
   1.615 + 			       NORMAL, FLAG_NONE)
   1.616 + 
   1.617 ++
   1.618 ++/* An example of SSE2 two-stage bilinear_over_8888_0565 fast path, which is implemented
   1.619 ++   as scaled_bilinear_scanline_sse2_8888_8888_SRC + op_bilinear_over_8888_0565 */
   1.620 ++
   1.621 ++void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
   1.622 ++{
   1.623 ++    /* Note: this is not really fast and should be based on 8 pixel loop from sse2_composite_over_8888_0565 */
   1.624 ++    while (--width >= 0)
   1.625 ++    {
   1.626 ++	*dst = composite_over_8888_0565pixel (*src, *dst);
   1.627 ++	src++;
   1.628 ++	dst++;
   1.629 ++    }
   1.630 ++}
   1.631 ++
   1.632 ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_cover_OVER,
   1.633 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.634 ++			       uint32_t, uint32_t, uint16_t,
   1.635 ++			       COVER, FLAG_NONE)
   1.636 ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_pad_OVER,
   1.637 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.638 ++			       uint32_t, uint32_t, uint16_t,
   1.639 ++			       PAD, FLAG_NONE)
   1.640 ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_none_OVER,
   1.641 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.642 ++			       uint32_t, uint32_t, uint16_t,
   1.643 ++			       NONE, FLAG_NONE)
   1.644 ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_0565_normal_OVER,
   1.645 ++			       scaled_bilinear_scanline_sse2_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.646 ++			       uint32_t, uint32_t, uint16_t,
   1.647 ++			       NORMAL, FLAG_NONE)
   1.648 ++
   1.649 ++/*****************************/
   1.650 ++
   1.651 + static force_inline void
   1.652 + scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t *       dst,
   1.653 + 						const uint8_t  * mask,
   1.654 + 						const uint32_t * src_top,
   1.655 + 						const uint32_t * src_bottom,
   1.656 + 						int32_t          w,
   1.657 + 						int              wt,
   1.658 + 						int              wb,
   1.659 +@@ -5669,29 +5706,29 @@ scaled_bilinear_scanline_sse2_8888_8_888
   1.660 + 	}
   1.661 + 
   1.662 + 	w--;
   1.663 + 	dst++;
   1.664 +     }
   1.665 + }
   1.666 + 
   1.667 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
   1.668 +-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
   1.669 ++			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
   1.670 + 			       uint32_t, uint8_t, uint32_t,
   1.671 + 			       COVER, FLAG_HAVE_NON_SOLID_MASK)
   1.672 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
   1.673 +-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
   1.674 ++			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
   1.675 + 			       uint32_t, uint8_t, uint32_t,
   1.676 + 			       PAD, FLAG_HAVE_NON_SOLID_MASK)
   1.677 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
   1.678 +-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
   1.679 ++			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
   1.680 + 			       uint32_t, uint8_t, uint32_t,
   1.681 + 			       NONE, FLAG_HAVE_NON_SOLID_MASK)
   1.682 + FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
   1.683 +-			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
   1.684 ++			       scaled_bilinear_scanline_sse2_8888_8_8888_OVER, NULL,
   1.685 + 			       uint32_t, uint8_t, uint32_t,
   1.686 + 			       NORMAL, FLAG_HAVE_NON_SOLID_MASK)
   1.687 + 
   1.688 + static const pixman_fast_path_t sse2_fast_paths[] =
   1.689 + {
   1.690 +     /* PIXMAN_OP_OVER */
   1.691 +     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
   1.692 +     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
   1.693 +@@ -5808,16 +5845,21 @@ static const pixman_fast_path_t sse2_fas
   1.694 +     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
   1.695 +     SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
   1.696 + 
   1.697 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
   1.698 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
   1.699 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
   1.700 +     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
   1.701 + 
   1.702 ++    /* and here the needed entries are added to the fast path table */
   1.703 ++
   1.704 ++    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, sse2_8888_0565),
   1.705 ++    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, b5g6r5, sse2_8888_0565),
   1.706 ++
   1.707 +     { PIXMAN_OP_NONE },
   1.708 + };
   1.709 + 
   1.710 + static pixman_bool_t
   1.711 + sse2_blt (pixman_implementation_t *imp,
   1.712 +           uint32_t *               src_bits,
   1.713 +           uint32_t *               dst_bits,
   1.714 +           int                      src_stride,
   1.715 +

mercurial