Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
15 *
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 *
23 * Author: Keith Packard, SuSE, Inc.
24 */
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
29 #include "pixman-private.h"
31 #define PIXMAN_REPEAT_COVER -1
33 /* Flags describing input parameters to fast path macro template.
34 * Turning on some flag values may indicate that
35 * "some property X is available so template can use this" or
36 * "some property X should be handled by template".
37 *
38 * FLAG_HAVE_SOLID_MASK
39 * Input mask is solid so template should handle this.
40 *
41 * FLAG_HAVE_NON_SOLID_MASK
42 * Input mask is bits mask so template should handle this.
43 *
44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45 * exclusive. (It's not allowed to turn both flags on)
46 */
47 #define FLAG_NONE (0)
48 #define FLAG_HAVE_SOLID_MASK (1 << 1)
49 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
51 /* To avoid too short repeated scanline function calls, extend source
52 * scanlines having width less than below constant value.
53 */
54 #define REPEAT_NORMAL_MIN_WIDTH 64
56 static force_inline pixman_bool_t
57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59 if (repeat == PIXMAN_REPEAT_NONE)
60 {
61 if (*c < 0 || *c >= size)
62 return FALSE;
63 }
64 else if (repeat == PIXMAN_REPEAT_NORMAL)
65 {
66 while (*c >= size)
67 *c -= size;
68 while (*c < 0)
69 *c += size;
70 }
71 else if (repeat == PIXMAN_REPEAT_PAD)
72 {
73 *c = CLIP (*c, 0, size - 1);
74 }
75 else /* REFLECT */
76 {
77 *c = MOD (*c, size * 2);
78 if (*c >= size)
79 *c = size * 2 - *c - 1;
80 }
81 return TRUE;
82 }
84 static force_inline int
85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 {
87 return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88 ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 }
91 #if BILINEAR_INTERPOLATION_BITS <= 4
92 /* Inspired by Filter_32_opaque from Skia */
93 static force_inline uint32_t
94 bilinear_interpolation (uint32_t tl, uint32_t tr,
95 uint32_t bl, uint32_t br,
96 int distx, int disty)
97 {
98 int distxy, distxiy, distixy, distixiy;
99 uint32_t lo, hi;
101 distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102 disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
104 distxy = distx * disty;
105 distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
106 distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
107 distixiy =
108 16 * 16 - (disty << 4) -
109 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
111 lo = (tl & 0xff00ff) * distixiy;
112 hi = ((tl >> 8) & 0xff00ff) * distixiy;
114 lo += (tr & 0xff00ff) * distxiy;
115 hi += ((tr >> 8) & 0xff00ff) * distxiy;
117 lo += (bl & 0xff00ff) * distixy;
118 hi += ((bl >> 8) & 0xff00ff) * distixy;
120 lo += (br & 0xff00ff) * distxy;
121 hi += ((br >> 8) & 0xff00ff) * distxy;
123 return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 }
126 #else
127 #if SIZEOF_LONG > 4
129 static force_inline uint32_t
130 bilinear_interpolation (uint32_t tl, uint32_t tr,
131 uint32_t bl, uint32_t br,
132 int distx, int disty)
133 {
134 uint64_t distxy, distxiy, distixy, distixiy;
135 uint64_t tl64, tr64, bl64, br64;
136 uint64_t f, r;
138 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
141 distxy = distx * disty;
142 distxiy = distx * (256 - disty);
143 distixy = (256 - distx) * disty;
144 distixiy = (256 - distx) * (256 - disty);
146 /* Alpha and Blue */
147 tl64 = tl & 0xff0000ff;
148 tr64 = tr & 0xff0000ff;
149 bl64 = bl & 0xff0000ff;
150 br64 = br & 0xff0000ff;
152 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153 r = f & 0x0000ff0000ff0000ull;
155 /* Red and Green */
156 tl64 = tl;
157 tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
159 tr64 = tr;
160 tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
162 bl64 = bl;
163 bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
165 br64 = br;
166 br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
168 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169 r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
171 return (uint32_t)(r >> 16);
172 }
174 #else
176 #ifdef LOW_QUALITY_INTERPOLATION
177 /* Based on Filter_32_opaque_portable from Skia */
178 static force_inline uint32_t
179 bilinear_interpolation(uint32_t a00, uint32_t a01,
180 uint32_t a10, uint32_t a11,
181 int x, int y)
182 {
183 int xy = x * y;
184 static const uint32_t mask = 0xff00ff;
186 int scale = 256 - 16*y - 16*x + xy;
187 uint32_t lo = (a00 & mask) * scale;
188 uint32_t hi = ((a00 >> 8) & mask) * scale;
190 scale = 16*x - xy;
191 lo += (a01 & mask) * scale;
192 hi += ((a01 >> 8) & mask) * scale;
194 scale = 16*y - xy;
195 lo += (a10 & mask) * scale;
196 hi += ((a10 >> 8) & mask) * scale;
198 lo += (a11 & mask) * xy;
199 hi += ((a11 >> 8) & mask) * xy;
201 return ((lo >> 8) & mask) | (hi & ~mask);
202 }
203 #else
204 static force_inline uint32_t
205 bilinear_interpolation (uint32_t tl, uint32_t tr,
206 uint32_t bl, uint32_t br,
207 int distx, int disty)
208 {
209 int distxy, distxiy, distixy, distixiy;
210 uint32_t f, r;
212 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
213 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
215 distxy = distx * disty;
216 distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
217 distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
218 distixiy =
219 256 * 256 - (disty << 8) -
220 (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
222 /* Blue */
223 r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
224 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
226 /* Green */
227 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
228 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
229 r |= f & 0xff000000;
231 tl >>= 16;
232 tr >>= 16;
233 bl >>= 16;
234 br >>= 16;
235 r >>= 16;
237 /* Red */
238 f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
239 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
240 r |= f & 0x00ff0000;
242 /* Alpha */
243 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
244 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
245 r |= f & 0xff000000;
247 return r;
248 }
249 #endif
250 #endif
251 #endif // BILINEAR_INTERPOLATION_BITS <= 4
253 /*
254 * For each scanline fetched from source image with PAD repeat:
255 * - calculate how many pixels need to be padded on the left side
256 * - calculate how many pixels need to be padded on the right side
257 * - update width to only count pixels which are fetched from the image
258 * All this information is returned via 'width', 'left_pad', 'right_pad'
259 * arguments. The code is assuming that 'unit_x' is positive.
260 *
261 * Note: 64-bit math is used in order to avoid potential overflows, which
262 * is probably excessive in many cases. This particular function
263 * may need its own correctness test and performance tuning.
264 */
265 static force_inline void
266 pad_repeat_get_scanline_bounds (int32_t source_image_width,
267 pixman_fixed_t vx,
268 pixman_fixed_t unit_x,
269 int32_t * width,
270 int32_t * left_pad,
271 int32_t * right_pad)
272 {
273 int64_t max_vx = (int64_t) source_image_width << 16;
274 int64_t tmp;
275 if (vx < 0)
276 {
277 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
278 if (tmp > *width)
279 {
280 *left_pad = *width;
281 *width = 0;
282 }
283 else
284 {
285 *left_pad = (int32_t) tmp;
286 *width -= (int32_t) tmp;
287 }
288 }
289 else
290 {
291 *left_pad = 0;
292 }
293 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
294 if (tmp < 0)
295 {
296 *right_pad = *width;
297 *width = 0;
298 }
299 else if (tmp >= *width)
300 {
301 *right_pad = 0;
302 }
303 else
304 {
305 *right_pad = *width - (int32_t) tmp;
306 *width = (int32_t) tmp;
307 }
308 }
310 /* A macroified version of specialized nearest scalers for some
311 * common 8888 and 565 formats. It supports SRC and OVER ops.
312 *
313 * There are two repeat versions, one that handles repeat normal,
314 * and one without repeat handling that only works if the src region
315 * used is completely covered by the pre-repeated source samples.
316 *
317 * The loops are unrolled to process two pixels per iteration for better
318 * performance on most CPU architectures (superscalar processors
319 * can issue several operations simultaneously, other processors can hide
320 * instructions latencies by pipelining operations). Unrolling more
321 * does not make much sense because the compiler will start running out
322 * of spare registers soon.
323 */
325 #define GET_8888_ALPHA(s) ((s) >> 24)
326 /* This is not actually used since we don't have an OVER with
327 565 source, but it is needed to build. */
328 #define GET_0565_ALPHA(s) 0xff
329 #define GET_x888_ALPHA(s) 0xff
331 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
332 src_type_t, dst_type_t, OP, repeat_mode) \
333 static force_inline void \
334 scanline_func_name (dst_type_t *dst, \
335 const src_type_t *src, \
336 int32_t w, \
337 pixman_fixed_t vx, \
338 pixman_fixed_t unit_x, \
339 pixman_fixed_t src_width_fixed, \
340 pixman_bool_t fully_transparent_src) \
341 { \
342 uint32_t d; \
343 src_type_t s1, s2; \
344 uint8_t a1, a2; \
345 int x1, x2; \
346 \
347 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
348 return; \
349 \
350 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
351 abort(); \
352 \
353 while ((w -= 2) >= 0) \
354 { \
355 x1 = pixman_fixed_to_int (vx); \
356 vx += unit_x; \
357 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
358 { \
359 /* This works because we know that unit_x is positive */ \
360 while (vx >= 0) \
361 vx -= src_width_fixed; \
362 } \
363 s1 = *(src + x1); \
364 \
365 x2 = pixman_fixed_to_int (vx); \
366 vx += unit_x; \
367 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
368 { \
369 /* This works because we know that unit_x is positive */ \
370 while (vx >= 0) \
371 vx -= src_width_fixed; \
372 } \
373 s2 = *(src + x2); \
374 \
375 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
376 { \
377 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
378 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
379 \
380 if (a1 == 0xff) \
381 { \
382 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
383 } \
384 else if (s1) \
385 { \
386 d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \
387 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
388 a1 ^= 0xff; \
389 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
390 *dst = convert_8888_to_ ## DST_FORMAT (d); \
391 } \
392 dst++; \
393 \
394 if (a2 == 0xff) \
395 { \
396 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
397 } \
398 else if (s2) \
399 { \
400 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
401 s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \
402 a2 ^= 0xff; \
403 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
404 *dst = convert_8888_to_ ## DST_FORMAT (d); \
405 } \
406 dst++; \
407 } \
408 else /* PIXMAN_OP_SRC */ \
409 { \
410 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
411 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
412 } \
413 } \
414 \
415 if (w & 1) \
416 { \
417 x1 = pixman_fixed_to_int (vx); \
418 s1 = *(src + x1); \
419 \
420 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
421 { \
422 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
423 \
424 if (a1 == 0xff) \
425 { \
426 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
427 } \
428 else if (s1) \
429 { \
430 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
431 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
432 a1 ^= 0xff; \
433 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
434 *dst = convert_8888_to_ ## DST_FORMAT (d); \
435 } \
436 dst++; \
437 } \
438 else /* PIXMAN_OP_SRC */ \
439 { \
440 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
441 } \
442 } \
443 }
445 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
446 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
447 static void \
448 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
449 pixman_composite_info_t *info) \
450 { \
451 PIXMAN_COMPOSITE_ARGS (info); \
452 dst_type_t *dst_line; \
453 mask_type_t *mask_line; \
454 src_type_t *src_first_line; \
455 int y; \
456 pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \
457 pixman_fixed_t max_vy; \
458 pixman_vector_t v; \
459 pixman_fixed_t vx, vy; \
460 pixman_fixed_t unit_x, unit_y; \
461 int32_t left_pad, right_pad; \
462 \
463 src_type_t *src; \
464 dst_type_t *dst; \
465 mask_type_t solid_mask; \
466 const mask_type_t *mask = &solid_mask; \
467 int src_stride, mask_stride, dst_stride; \
468 \
469 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
470 if (have_mask) \
471 { \
472 if (mask_is_solid) \
473 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
474 else \
475 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
476 mask_stride, mask_line, 1); \
477 } \
478 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
479 * transformed from destination space to source space */ \
480 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
481 \
482 /* reference point is the center of the pixel */ \
483 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
484 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
485 v.vector[2] = pixman_fixed_1; \
486 \
487 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
488 return; \
489 \
490 unit_x = src_image->common.transform->matrix[0][0]; \
491 unit_y = src_image->common.transform->matrix[1][1]; \
492 \
493 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
494 v.vector[0] -= pixman_fixed_e; \
495 v.vector[1] -= pixman_fixed_e; \
496 \
497 vx = v.vector[0]; \
498 vy = v.vector[1]; \
499 \
500 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
501 { \
502 max_vy = pixman_int_to_fixed (src_image->bits.height); \
503 \
504 /* Clamp repeating positions inside the actual samples */ \
505 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
506 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
507 } \
508 \
509 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
510 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
511 { \
512 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
513 &width, &left_pad, &right_pad); \
514 vx += left_pad * unit_x; \
515 } \
516 \
517 while (--height >= 0) \
518 { \
519 dst = dst_line; \
520 dst_line += dst_stride; \
521 if (have_mask && !mask_is_solid) \
522 { \
523 mask = mask_line; \
524 mask_line += mask_stride; \
525 } \
526 \
527 y = pixman_fixed_to_int (vy); \
528 vy += unit_y; \
529 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
530 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
531 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
532 { \
533 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
534 src = src_first_line + src_stride * y; \
535 if (left_pad > 0) \
536 { \
537 scanline_func (mask, dst, \
538 src + src_image->bits.width - src_image->bits.width + 1, \
539 left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
540 } \
541 if (width > 0) \
542 { \
543 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
544 dst + left_pad, src + src_image->bits.width, width, \
545 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
546 } \
547 if (right_pad > 0) \
548 { \
549 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
550 dst + left_pad + width, src + src_image->bits.width, \
551 right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
552 } \
553 } \
554 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
555 { \
556 static const src_type_t zero[1] = { 0 }; \
557 if (y < 0 || y >= src_image->bits.height) \
558 { \
559 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \
560 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
561 continue; \
562 } \
563 src = src_first_line + src_stride * y; \
564 if (left_pad > 0) \
565 { \
566 scanline_func (mask, dst, zero + 1, left_pad, \
567 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
568 } \
569 if (width > 0) \
570 { \
571 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
572 dst + left_pad, src + src_image->bits.width, width, \
573 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
574 } \
575 if (right_pad > 0) \
576 { \
577 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
578 dst + left_pad + width, zero + 1, right_pad, \
579 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
580 } \
581 } \
582 else \
583 { \
584 src = src_first_line + src_stride * y; \
585 scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
586 unit_x, src_width_fixed, FALSE); \
587 } \
588 } \
589 }
591 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
592 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
593 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
594 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
595 dst_type_t, repeat_mode, have_mask, mask_is_solid)
597 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
598 repeat_mode) \
599 static force_inline void \
600 scanline_func##scale_func_name##_wrapper ( \
601 const uint8_t *mask, \
602 dst_type_t *dst, \
603 const src_type_t *src, \
604 int32_t w, \
605 pixman_fixed_t vx, \
606 pixman_fixed_t unit_x, \
607 pixman_fixed_t max_vx, \
608 pixman_bool_t fully_transparent_src) \
609 { \
610 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
611 } \
612 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
613 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
615 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
616 repeat_mode) \
617 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
618 dst_type_t, repeat_mode)
620 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
621 src_type_t, dst_type_t, OP, repeat_mode) \
622 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
623 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
624 OP, repeat_mode) \
625 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
626 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
627 src_type_t, dst_type_t, repeat_mode)
630 #define SCALED_NEAREST_FLAGS \
631 (FAST_PATH_SCALE_TRANSFORM | \
632 FAST_PATH_NO_ALPHA_MAP | \
633 FAST_PATH_NEAREST_FILTER | \
634 FAST_PATH_NO_ACCESSORS | \
635 FAST_PATH_NARROW_FORMAT)
637 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
638 { PIXMAN_OP_ ## op, \
639 PIXMAN_ ## s, \
640 (SCALED_NEAREST_FLAGS | \
641 FAST_PATH_NORMAL_REPEAT | \
642 FAST_PATH_X_UNIT_POSITIVE), \
643 PIXMAN_null, 0, \
644 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
645 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
646 }
648 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
649 { PIXMAN_OP_ ## op, \
650 PIXMAN_ ## s, \
651 (SCALED_NEAREST_FLAGS | \
652 FAST_PATH_PAD_REPEAT | \
653 FAST_PATH_X_UNIT_POSITIVE), \
654 PIXMAN_null, 0, \
655 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
656 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
657 }
659 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
660 { PIXMAN_OP_ ## op, \
661 PIXMAN_ ## s, \
662 (SCALED_NEAREST_FLAGS | \
663 FAST_PATH_NONE_REPEAT | \
664 FAST_PATH_X_UNIT_POSITIVE), \
665 PIXMAN_null, 0, \
666 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
667 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
668 }
670 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
671 { PIXMAN_OP_ ## op, \
672 PIXMAN_ ## s, \
673 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
674 PIXMAN_null, 0, \
675 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
676 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
677 }
679 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
680 { PIXMAN_OP_ ## op, \
681 PIXMAN_ ## s, \
682 (SCALED_NEAREST_FLAGS | \
683 FAST_PATH_NORMAL_REPEAT | \
684 FAST_PATH_X_UNIT_POSITIVE), \
685 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
686 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
687 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
688 }
690 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
691 { PIXMAN_OP_ ## op, \
692 PIXMAN_ ## s, \
693 (SCALED_NEAREST_FLAGS | \
694 FAST_PATH_PAD_REPEAT | \
695 FAST_PATH_X_UNIT_POSITIVE), \
696 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
697 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
698 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
699 }
701 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
702 { PIXMAN_OP_ ## op, \
703 PIXMAN_ ## s, \
704 (SCALED_NEAREST_FLAGS | \
705 FAST_PATH_NONE_REPEAT | \
706 FAST_PATH_X_UNIT_POSITIVE), \
707 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
708 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
709 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
710 }
712 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
713 { PIXMAN_OP_ ## op, \
714 PIXMAN_ ## s, \
715 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
716 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
717 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
718 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
719 }
721 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
722 { PIXMAN_OP_ ## op, \
723 PIXMAN_ ## s, \
724 (SCALED_NEAREST_FLAGS | \
725 FAST_PATH_NORMAL_REPEAT | \
726 FAST_PATH_X_UNIT_POSITIVE), \
727 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
728 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
729 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
730 }
732 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
733 { PIXMAN_OP_ ## op, \
734 PIXMAN_ ## s, \
735 (SCALED_NEAREST_FLAGS | \
736 FAST_PATH_PAD_REPEAT | \
737 FAST_PATH_X_UNIT_POSITIVE), \
738 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
739 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
740 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
741 }
743 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
744 { PIXMAN_OP_ ## op, \
745 PIXMAN_ ## s, \
746 (SCALED_NEAREST_FLAGS | \
747 FAST_PATH_NONE_REPEAT | \
748 FAST_PATH_X_UNIT_POSITIVE), \
749 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
750 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
751 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
752 }
754 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
755 { PIXMAN_OP_ ## op, \
756 PIXMAN_ ## s, \
757 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
758 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
759 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
760 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
761 }
763 /* Prefer the use of 'cover' variant, because it is faster */
764 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
765 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
766 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
767 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
768 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
770 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
771 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
772 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
773 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
775 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
776 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
777 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
778 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
780 /*****************************************************************************/
782 /*
783 * Identify 5 zones in each scanline for bilinear scaling. Depending on
784 * whether 2 pixels to be interpolated are fetched from the image itself,
785 * from the padding area around it or from both image and padding area.
786 */
787 static force_inline void
788 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
789 pixman_fixed_t vx,
790 pixman_fixed_t unit_x,
791 int32_t * left_pad,
792 int32_t * left_tz,
793 int32_t * width,
794 int32_t * right_tz,
795 int32_t * right_pad)
796 {
797 int width1 = *width, left_pad1, right_pad1;
798 int width2 = *width, left_pad2, right_pad2;
800 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
801 &width1, &left_pad1, &right_pad1);
802 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
803 unit_x, &width2, &left_pad2, &right_pad2);
805 *left_pad = left_pad2;
806 *left_tz = left_pad1 - left_pad2;
807 *right_tz = right_pad2 - right_pad1;
808 *right_pad = right_pad1;
809 *width -= *left_pad + *left_tz + *right_tz + *right_pad;
810 }
812 /*
813 * Main loop template for single pass bilinear scaling. It needs to be
814 * provided with 'scanline_func' which should do the compositing operation.
815 * The needed function has the following prototype:
816 *
817 * scanline_func (dst_type_t * dst,
818 * const mask_type_ * mask,
819 * const src_type_t * src_top,
820 * const src_type_t * src_bottom,
821 * int32_t width,
822 * int weight_top,
823 * int weight_bottom,
824 * pixman_fixed_t vx,
825 * pixman_fixed_t unit_x,
826 * pixman_fixed_t max_vx,
827 * pixman_bool_t zero_src)
828 *
829 * Where:
830 * dst - destination scanline buffer for storing results
831 * mask - mask buffer (or single value for solid mask)
832 * src_top, src_bottom - two source scanlines
833 * width - number of pixels to process
834 * weight_top - weight of the top row for interpolation
835 * weight_bottom - weight of the bottom row for interpolation
836 * vx - initial position for fetching the first pair of
837 * pixels from the source buffer
838 * unit_x - position increment needed to move to the next pair
839 * of pixels
840 * max_vx - image size as a fixed point value, can be used for
841 * implementing NORMAL repeat (when it is supported)
842 * zero_src - boolean hint variable, which is set to TRUE when
843 * all source pixels are fetched from zero padding
844 * zone for NONE repeat
845 *
846 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
847 * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
848 * for NONE repeat when handling fuzzy antialiased top or bottom image
849 * edges. Also both top and bottom weight variables are guaranteed to
850 * have value, which is less than BILINEAR_INTERPOLATION_RANGE.
851 * For example, the weights can fit into unsigned byte or be used
852 * with 8-bit SIMD multiplication instructions for 8-bit interpolation
853 * precision.
854 */
856 /* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
857 * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
858 * combine), "op_func" may be NULL, in this case we keep old behavior.
859 * This is ugly and gcc issues some warnings, but works.
860 *
861 * An advice: clang has much better error reporting than gcc for deeply nested macros.
862 */
864 #define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
865 scanline_buf, mask, src_top, src_bottom, width, \
866 weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \
867 do { \
868 if (op_func != NULL) \
869 { \
870 fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
871 (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
872 ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
873 ((dst), (mask), (src_type_t *)scanline_buf, (width)); \
874 } \
875 else \
876 { \
877 fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \
878 (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
879 } \
880 } while (0)
883 #define SCANLINE_BUFFER_LENGTH 3072
885 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \
886 mask_type_t, dst_type_t, repeat_mode, flags) \
887 static void \
888 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
889 pixman_composite_info_t *info) \
890 { \
891 PIXMAN_COMPOSITE_ARGS (info); \
892 dst_type_t *dst_line; \
893 mask_type_t *mask_line; \
894 src_type_t *src_first_line; \
895 int y1, y2; \
896 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
897 pixman_vector_t v; \
898 pixman_fixed_t vx, vy; \
899 pixman_fixed_t unit_x, unit_y; \
900 int32_t left_pad, left_tz, right_tz, right_pad; \
901 \
902 dst_type_t *dst; \
903 mask_type_t solid_mask; \
904 const mask_type_t *mask = &solid_mask; \
905 int src_stride, mask_stride, dst_stride; \
906 \
907 int src_width; \
908 pixman_fixed_t src_width_fixed; \
909 int max_x; \
910 pixman_bool_t need_src_extension; \
911 \
912 uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \
913 uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \
914 \
915 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
916 if (flags & FLAG_HAVE_SOLID_MASK) \
917 { \
918 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
919 mask_stride = 0; \
920 } \
921 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
922 { \
923 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
924 mask_stride, mask_line, 1); \
925 } \
926 \
927 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
928 * transformed from destination space to source space */ \
929 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
930 \
931 /* reference point is the center of the pixel */ \
932 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
933 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
934 v.vector[2] = pixman_fixed_1; \
935 \
936 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
937 return; \
938 \
939 unit_x = src_image->common.transform->matrix[0][0]; \
940 unit_y = src_image->common.transform->matrix[1][1]; \
941 \
942 v.vector[0] -= pixman_fixed_1 / 2; \
943 v.vector[1] -= pixman_fixed_1 / 2; \
944 \
945 vy = v.vector[1]; \
946 \
947 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
948 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
949 { \
950 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
951 &left_pad, &left_tz, &width, &right_tz, &right_pad); \
952 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
953 { \
954 /* PAD repeat does not need special handling for 'transition zones' and */ \
955 /* they can be combined with 'padding zones' safely */ \
956 left_pad += left_tz; \
957 right_pad += right_tz; \
958 left_tz = right_tz = 0; \
959 } \
960 v.vector[0] += left_pad * unit_x; \
961 } \
962 \
963 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
964 { \
965 vx = v.vector[0]; \
966 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
967 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \
968 \
969 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
970 { \
971 src_width = 0; \
972 \
973 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
974 src_width += src_image->bits.width; \
975 \
976 need_src_extension = TRUE; \
977 } \
978 else \
979 { \
980 src_width = src_image->bits.width; \
981 need_src_extension = FALSE; \
982 } \
983 \
984 src_width_fixed = pixman_int_to_fixed (src_width); \
985 } \
986 \
987 if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \
988 { \
989 scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \
990 \
991 if (!scanline_buffer) \
992 return; \
993 } \
994 \
995 while (--height >= 0) \
996 { \
997 int weight1, weight2; \
998 dst = dst_line; \
999 dst_line += dst_stride; \
1000 vx = v.vector[0]; \
1001 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1002 { \
1003 mask = mask_line; \
1004 mask_line += mask_stride; \
1005 } \
1006 \
1007 y1 = pixman_fixed_to_int (vy); \
1008 weight2 = pixman_fixed_to_bilinear_weight (vy); \
1009 if (weight2) \
1010 { \
1011 /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \
1012 y2 = y1 + 1; \
1013 weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \
1014 } \
1015 else \
1016 { \
1017 /* set both top and bottom row to the same scanline and tweak weights */ \
1018 y2 = y1; \
1019 weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \
1020 } \
1021 vy += unit_y; \
1022 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
1023 { \
1024 src_type_t *src1, *src2; \
1025 src_type_t buf1[2]; \
1026 src_type_t buf2[2]; \
1027 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
1028 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
1029 src1 = src_first_line + src_stride * y1; \
1030 src2 = src_first_line + src_stride * y2; \
1031 \
1032 if (left_pad > 0) \
1033 { \
1034 buf1[0] = buf1[1] = src1[0]; \
1035 buf2[0] = buf2[1] = src2[0]; \
1036 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1037 scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
1038 0, 0, 0, FALSE); \
1039 dst += left_pad; \
1040 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1041 mask += left_pad; \
1042 } \
1043 if (width > 0) \
1044 { \
1045 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1046 scanline_buffer, mask, src1, src2, width, weight1, weight2, \
1047 vx, unit_x, 0, FALSE); \
1048 dst += width; \
1049 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1050 mask += width; \
1051 } \
1052 if (right_pad > 0) \
1053 { \
1054 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
1055 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
1056 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1057 scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
1058 0, 0, 0, FALSE); \
1059 } \
1060 } \
1061 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
1062 { \
1063 src_type_t *src1, *src2; \
1064 src_type_t buf1[2]; \
1065 src_type_t buf2[2]; \
1066 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
1067 if (y1 < 0) \
1068 { \
1069 weight1 = 0; \
1070 y1 = 0; \
1071 } \
1072 if (y1 >= src_image->bits.height) \
1073 { \
1074 weight1 = 0; \
1075 y1 = src_image->bits.height - 1; \
1076 } \
1077 if (y2 < 0) \
1078 { \
1079 weight2 = 0; \
1080 y2 = 0; \
1081 } \
1082 if (y2 >= src_image->bits.height) \
1083 { \
1084 weight2 = 0; \
1085 y2 = src_image->bits.height - 1; \
1086 } \
1087 src1 = src_first_line + src_stride * y1; \
1088 src2 = src_first_line + src_stride * y2; \
1089 \
1090 if (left_pad > 0) \
1091 { \
1092 buf1[0] = buf1[1] = 0; \
1093 buf2[0] = buf2[1] = 0; \
1094 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1095 scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
1096 0, 0, 0, TRUE); \
1097 dst += left_pad; \
1098 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1099 mask += left_pad; \
1100 } \
1101 if (left_tz > 0) \
1102 { \
1103 buf1[0] = 0; \
1104 buf1[1] = src1[0]; \
1105 buf2[0] = 0; \
1106 buf2[1] = src2[0]; \
1107 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1108 scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \
1109 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1110 dst += left_tz; \
1111 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1112 mask += left_tz; \
1113 vx += left_tz * unit_x; \
1114 } \
1115 if (width > 0) \
1116 { \
1117 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1118 scanline_buffer, mask, src1, src2, width, weight1, weight2, \
1119 vx, unit_x, 0, FALSE); \
1120 dst += width; \
1121 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1122 mask += width; \
1123 vx += width * unit_x; \
1124 } \
1125 if (right_tz > 0) \
1126 { \
1127 buf1[0] = src1[src_image->bits.width - 1]; \
1128 buf1[1] = 0; \
1129 buf2[0] = src2[src_image->bits.width - 1]; \
1130 buf2[1] = 0; \
1131 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1132 scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \
1133 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1134 dst += right_tz; \
1135 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1136 mask += right_tz; \
1137 } \
1138 if (right_pad > 0) \
1139 { \
1140 buf1[0] = buf1[1] = 0; \
1141 buf2[0] = buf2[1] = 0; \
1142 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1143 scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
1144 0, 0, 0, TRUE); \
1145 } \
1146 } \
1147 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
1148 { \
1149 int32_t num_pixels; \
1150 int32_t width_remain; \
1151 src_type_t * src_line_top; \
1152 src_type_t * src_line_bottom; \
1153 src_type_t buf1[2]; \
1154 src_type_t buf2[2]; \
1155 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
1156 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
1157 int i, j; \
1158 \
1159 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
1160 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
1161 src_line_top = src_first_line + src_stride * y1; \
1162 src_line_bottom = src_first_line + src_stride * y2; \
1163 \
1164 if (need_src_extension) \
1165 { \
1166 for (i=0; i<src_width;) \
1167 { \
1168 for (j=0; j<src_image->bits.width; j++, i++) \
1169 { \
1170 extended_src_line0[i] = src_line_top[j]; \
1171 extended_src_line1[i] = src_line_bottom[j]; \
1172 } \
1173 } \
1174 \
1175 src_line_top = &extended_src_line0[0]; \
1176 src_line_bottom = &extended_src_line1[0]; \
1177 } \
1178 \
1179 /* Top & Bottom wrap around buffer */ \
1180 buf1[0] = src_line_top[src_width - 1]; \
1181 buf1[1] = src_line_top[0]; \
1182 buf2[0] = src_line_bottom[src_width - 1]; \
1183 buf2[1] = src_line_bottom[0]; \
1184 \
1185 width_remain = width; \
1186 \
1187 while (width_remain > 0) \
1188 { \
1189 /* We use src_width_fixed because it can make vx in original source range */ \
1190 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1191 \
1192 /* Wrap around part */ \
1193 if (pixman_fixed_to_int (vx) == src_width - 1) \
1194 { \
1195 /* for positive unit_x \
1196 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
1197 * \
1198 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1199 * So we are safe from overflow. \
1200 */ \
1201 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
1202 \
1203 if (num_pixels > width_remain) \
1204 num_pixels = width_remain; \
1205 \
1206 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
1207 dst, scanline_buffer, mask, buf1, buf2, num_pixels, \
1208 weight1, weight2, pixman_fixed_frac(vx), \
1209 unit_x, src_width_fixed, FALSE); \
1210 \
1211 width_remain -= num_pixels; \
1212 vx += num_pixels * unit_x; \
1213 dst += num_pixels; \
1214 \
1215 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1216 mask += num_pixels; \
1217 \
1218 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1219 } \
1220 \
1221 /* Normal scanline composite */ \
1222 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
1223 { \
1224 /* for positive unit_x \
1225 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
1226 * \
1227 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1228 * So we are safe from overflow here. \
1229 */ \
1230 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
1231 / unit_x) + 1; \
1232 \
1233 if (num_pixels > width_remain) \
1234 num_pixels = width_remain; \
1235 \
1236 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
1237 dst, scanline_buffer, mask, src_line_top, src_line_bottom, \
1238 num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \
1239 FALSE); \
1240 \
1241 width_remain -= num_pixels; \
1242 vx += num_pixels * unit_x; \
1243 dst += num_pixels; \
1244 \
1245 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1246 mask += num_pixels; \
1247 } \
1248 } \
1249 } \
1250 else \
1251 { \
1252 scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1253 scanline_buffer, mask, \
1254 src_first_line + src_stride * y1, \
1255 src_first_line + src_stride * y2, width, \
1256 weight1, weight2, vx, unit_x, max_vx, FALSE); \
1257 } \
1258 } \
1259 if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \
1260 free (scanline_buffer); \
1261 }
1263 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1264 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1265 dst_type_t, repeat_mode, flags) \
1266 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1267 dst_type_t, repeat_mode, flags)
1269 #define SCALED_BILINEAR_FLAGS \
1270 (FAST_PATH_SCALE_TRANSFORM | \
1271 FAST_PATH_NO_ALPHA_MAP | \
1272 FAST_PATH_BILINEAR_FILTER | \
1273 FAST_PATH_NO_ACCESSORS | \
1274 FAST_PATH_NARROW_FORMAT)
1276 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
1277 { PIXMAN_OP_ ## op, \
1278 PIXMAN_ ## s, \
1279 (SCALED_BILINEAR_FLAGS | \
1280 FAST_PATH_PAD_REPEAT | \
1281 FAST_PATH_X_UNIT_POSITIVE), \
1282 PIXMAN_null, 0, \
1283 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1284 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1285 }
1287 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
1288 { PIXMAN_OP_ ## op, \
1289 PIXMAN_ ## s, \
1290 (SCALED_BILINEAR_FLAGS | \
1291 FAST_PATH_NONE_REPEAT | \
1292 FAST_PATH_X_UNIT_POSITIVE), \
1293 PIXMAN_null, 0, \
1294 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1295 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1296 }
1298 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
1299 { PIXMAN_OP_ ## op, \
1300 PIXMAN_ ## s, \
1301 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1302 PIXMAN_null, 0, \
1303 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1304 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1305 }
1307 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
1308 { PIXMAN_OP_ ## op, \
1309 PIXMAN_ ## s, \
1310 (SCALED_BILINEAR_FLAGS | \
1311 FAST_PATH_NORMAL_REPEAT | \
1312 FAST_PATH_X_UNIT_POSITIVE), \
1313 PIXMAN_null, 0, \
1314 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1315 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1316 }
1318 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
1319 { PIXMAN_OP_ ## op, \
1320 PIXMAN_ ## s, \
1321 (SCALED_BILINEAR_FLAGS | \
1322 FAST_PATH_PAD_REPEAT | \
1323 FAST_PATH_X_UNIT_POSITIVE), \
1324 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1325 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1326 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1327 }
1329 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
1330 { PIXMAN_OP_ ## op, \
1331 PIXMAN_ ## s, \
1332 (SCALED_BILINEAR_FLAGS | \
1333 FAST_PATH_NONE_REPEAT | \
1334 FAST_PATH_X_UNIT_POSITIVE), \
1335 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1336 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1337 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1338 }
1340 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
1341 { PIXMAN_OP_ ## op, \
1342 PIXMAN_ ## s, \
1343 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1344 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1345 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1346 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1347 }
1349 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1350 { PIXMAN_OP_ ## op, \
1351 PIXMAN_ ## s, \
1352 (SCALED_BILINEAR_FLAGS | \
1353 FAST_PATH_NORMAL_REPEAT | \
1354 FAST_PATH_X_UNIT_POSITIVE), \
1355 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1356 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1357 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1358 }
1360 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
1361 { PIXMAN_OP_ ## op, \
1362 PIXMAN_ ## s, \
1363 (SCALED_BILINEAR_FLAGS | \
1364 FAST_PATH_PAD_REPEAT | \
1365 FAST_PATH_X_UNIT_POSITIVE), \
1366 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1367 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1368 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1369 }
1371 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
1372 { PIXMAN_OP_ ## op, \
1373 PIXMAN_ ## s, \
1374 (SCALED_BILINEAR_FLAGS | \
1375 FAST_PATH_NONE_REPEAT | \
1376 FAST_PATH_X_UNIT_POSITIVE), \
1377 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1378 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1379 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1380 }
1382 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
1383 { PIXMAN_OP_ ## op, \
1384 PIXMAN_ ## s, \
1385 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1386 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1387 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1388 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1389 }
1391 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1392 { PIXMAN_OP_ ## op, \
1393 PIXMAN_ ## s, \
1394 (SCALED_BILINEAR_FLAGS | \
1395 FAST_PATH_NORMAL_REPEAT | \
1396 FAST_PATH_X_UNIT_POSITIVE), \
1397 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1398 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1399 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1400 }
1402 /* Prefer the use of 'cover' variant, because it is faster */
1403 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
1404 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
1405 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
1406 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
1407 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1409 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
1410 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
1411 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
1412 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
1413 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1415 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
1416 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
1417 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
1418 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
1419 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1421 #endif