gfx/cairo/libpixman/src/pixman-fast-path.c

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:02dcfbde28f8
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
15 *
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 *
23 * Author: Keith Packard, SuSE, Inc.
24 */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34
35 static force_inline uint32_t
36 fetch_24 (uint8_t *a)
37 {
38 if (((uintptr_t)a) & 1)
39 {
40 #ifdef WORDS_BIGENDIAN
41 return (*a << 16) | (*(uint16_t *)(a + 1));
42 #else
43 return *a | (*(uint16_t *)(a + 1) << 8);
44 #endif
45 }
46 else
47 {
48 #ifdef WORDS_BIGENDIAN
49 return (*(uint16_t *)a << 8) | *(a + 2);
50 #else
51 return *(uint16_t *)a | (*(a + 2) << 16);
52 #endif
53 }
54 }
55
56 static force_inline void
57 store_24 (uint8_t *a,
58 uint32_t v)
59 {
60 if (((uintptr_t)a) & 1)
61 {
62 #ifdef WORDS_BIGENDIAN
63 *a = (uint8_t) (v >> 16);
64 *(uint16_t *)(a + 1) = (uint16_t) (v);
65 #else
66 *a = (uint8_t) (v);
67 *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 #endif
69 }
70 else
71 {
72 #ifdef WORDS_BIGENDIAN
73 *(uint16_t *)a = (uint16_t)(v >> 8);
74 *(a + 2) = (uint8_t)v;
75 #else
76 *(uint16_t *)a = (uint16_t)v;
77 *(a + 2) = (uint8_t)(v >> 16);
78 #endif
79 }
80 }
81
82 static force_inline uint32_t
83 over (uint32_t src,
84 uint32_t dest)
85 {
86 uint32_t a = ~src >> 24;
87
88 UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89
90 return dest;
91 }
92
93 static force_inline uint32_t
94 in (uint32_t x,
95 uint8_t y)
96 {
97 uint16_t a = y;
98
99 UN8x4_MUL_UN8 (x, a);
100
101 return x;
102 }
103
104 /*
105 * Naming convention:
106 *
107 * op_src_mask_dest
108 */
109 static void
110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111 pixman_composite_info_t *info)
112 {
113 PIXMAN_COMPOSITE_ARGS (info);
114 uint32_t *src, *src_line;
115 uint32_t *dst, *dst_line;
116 uint8_t *mask, *mask_line;
117 int src_stride, mask_stride, dst_stride;
118 uint8_t m;
119 uint32_t s, d;
120 int32_t w;
121
122 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125
126 while (height--)
127 {
128 src = src_line;
129 src_line += src_stride;
130 dst = dst_line;
131 dst_line += dst_stride;
132 mask = mask_line;
133 mask_line += mask_stride;
134
135 w = width;
136 while (w--)
137 {
138 m = *mask++;
139 if (m)
140 {
141 s = *src | 0xff000000;
142
143 if (m == 0xff)
144 {
145 *dst = s;
146 }
147 else
148 {
149 d = in (s, m);
150 *dst = over (d, *dst);
151 }
152 }
153 src++;
154 dst++;
155 }
156 }
157 }
158
159 static void
160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161 pixman_composite_info_t *info)
162 {
163 PIXMAN_COMPOSITE_ARGS (info);
164 uint32_t src, srca;
165 uint8_t *dst_line, *dst;
166 uint8_t *mask_line, *mask, m;
167 int dst_stride, mask_stride;
168 int32_t w;
169 uint16_t t;
170
171 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172
173 srca = src >> 24;
174
175 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177
178 if (srca == 0xff)
179 {
180 while (height--)
181 {
182 dst = dst_line;
183 dst_line += dst_stride;
184 mask = mask_line;
185 mask_line += mask_stride;
186 w = width;
187
188 while (w--)
189 {
190 m = *mask++;
191
192 if (m == 0)
193 *dst = 0;
194 else if (m != 0xff)
195 *dst = MUL_UN8 (m, *dst, t);
196
197 dst++;
198 }
199 }
200 }
201 else
202 {
203 while (height--)
204 {
205 dst = dst_line;
206 dst_line += dst_stride;
207 mask = mask_line;
208 mask_line += mask_stride;
209 w = width;
210
211 while (w--)
212 {
213 m = *mask++;
214 m = MUL_UN8 (m, srca, t);
215
216 if (m == 0)
217 *dst = 0;
218 else if (m != 0xff)
219 *dst = MUL_UN8 (m, *dst, t);
220
221 dst++;
222 }
223 }
224 }
225 }
226
227 static void
228 fast_composite_in_8_8 (pixman_implementation_t *imp,
229 pixman_composite_info_t *info)
230 {
231 PIXMAN_COMPOSITE_ARGS (info);
232 uint8_t *dst_line, *dst;
233 uint8_t *src_line, *src;
234 int dst_stride, src_stride;
235 int32_t w;
236 uint8_t s;
237 uint16_t t;
238
239 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241
242 while (height--)
243 {
244 dst = dst_line;
245 dst_line += dst_stride;
246 src = src_line;
247 src_line += src_stride;
248 w = width;
249
250 while (w--)
251 {
252 s = *src++;
253
254 if (s == 0)
255 *dst = 0;
256 else if (s != 0xff)
257 *dst = MUL_UN8 (s, *dst, t);
258
259 dst++;
260 }
261 }
262 }
263
264 static void
265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266 pixman_composite_info_t *info)
267 {
268 PIXMAN_COMPOSITE_ARGS (info);
269 uint32_t src, srca;
270 uint32_t *dst_line, *dst, d;
271 uint8_t *mask_line, *mask, m;
272 int dst_stride, mask_stride;
273 int32_t w;
274
275 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276
277 srca = src >> 24;
278 if (src == 0)
279 return;
280
281 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283
284 while (height--)
285 {
286 dst = dst_line;
287 dst_line += dst_stride;
288 mask = mask_line;
289 mask_line += mask_stride;
290 w = width;
291
292 while (w--)
293 {
294 m = *mask++;
295 if (m == 0xff)
296 {
297 if (srca == 0xff)
298 *dst = src;
299 else
300 *dst = over (src, *dst);
301 }
302 else if (m)
303 {
304 d = in (src, m);
305 *dst = over (d, *dst);
306 }
307 dst++;
308 }
309 }
310 }
311
312 static void
313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314 pixman_composite_info_t *info)
315 {
316 PIXMAN_COMPOSITE_ARGS (info);
317 uint32_t src, s;
318 uint32_t *dst_line, *dst, d;
319 uint32_t *mask_line, *mask, ma;
320 int dst_stride, mask_stride;
321 int32_t w;
322
323 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324
325 if (src == 0)
326 return;
327
328 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330
331 while (height--)
332 {
333 dst = dst_line;
334 dst_line += dst_stride;
335 mask = mask_line;
336 mask_line += mask_stride;
337 w = width;
338
339 while (w--)
340 {
341 ma = *mask++;
342
343 if (ma)
344 {
345 d = *dst;
346 s = src;
347
348 UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349
350 *dst = s;
351 }
352
353 dst++;
354 }
355 }
356 }
357
358 static void
359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360 pixman_composite_info_t *info)
361 {
362 PIXMAN_COMPOSITE_ARGS (info);
363 uint32_t src, srca, s;
364 uint32_t *dst_line, *dst, d;
365 uint32_t *mask_line, *mask, ma;
366 int dst_stride, mask_stride;
367 int32_t w;
368
369 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370
371 srca = src >> 24;
372 if (src == 0)
373 return;
374
375 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377
378 while (height--)
379 {
380 dst = dst_line;
381 dst_line += dst_stride;
382 mask = mask_line;
383 mask_line += mask_stride;
384 w = width;
385
386 while (w--)
387 {
388 ma = *mask++;
389 if (ma == 0xffffffff)
390 {
391 if (srca == 0xff)
392 *dst = src;
393 else
394 *dst = over (src, *dst);
395 }
396 else if (ma)
397 {
398 d = *dst;
399 s = src;
400
401 UN8x4_MUL_UN8x4 (s, ma);
402 UN8x4_MUL_UN8 (ma, srca);
403 ma = ~ma;
404 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405
406 *dst = d;
407 }
408
409 dst++;
410 }
411 }
412 }
413
414 static void
415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416 pixman_composite_info_t *info)
417 {
418 PIXMAN_COMPOSITE_ARGS (info);
419 uint32_t src, srca;
420 uint8_t *dst_line, *dst;
421 uint32_t d;
422 uint8_t *mask_line, *mask, m;
423 int dst_stride, mask_stride;
424 int32_t w;
425
426 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427
428 srca = src >> 24;
429 if (src == 0)
430 return;
431
432 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434
435 while (height--)
436 {
437 dst = dst_line;
438 dst_line += dst_stride;
439 mask = mask_line;
440 mask_line += mask_stride;
441 w = width;
442
443 while (w--)
444 {
445 m = *mask++;
446 if (m == 0xff)
447 {
448 if (srca == 0xff)
449 {
450 d = src;
451 }
452 else
453 {
454 d = fetch_24 (dst);
455 d = over (src, d);
456 }
457 store_24 (dst, d);
458 }
459 else if (m)
460 {
461 d = over (in (src, m), fetch_24 (dst));
462 store_24 (dst, d);
463 }
464 dst += 3;
465 }
466 }
467 }
468
469 static void
470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471 pixman_composite_info_t *info)
472 {
473 PIXMAN_COMPOSITE_ARGS (info);
474 uint32_t src, srca;
475 uint16_t *dst_line, *dst;
476 uint32_t d;
477 uint8_t *mask_line, *mask, m;
478 int dst_stride, mask_stride;
479 int32_t w;
480
481 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482
483 srca = src >> 24;
484 if (src == 0)
485 return;
486
487 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489
490 while (height--)
491 {
492 dst = dst_line;
493 dst_line += dst_stride;
494 mask = mask_line;
495 mask_line += mask_stride;
496 w = width;
497
498 while (w--)
499 {
500 m = *mask++;
501 if (m == 0xff)
502 {
503 if (srca == 0xff)
504 {
505 d = src;
506 }
507 else
508 {
509 d = *dst;
510 d = over (src, convert_0565_to_0888 (d));
511 }
512 *dst = convert_8888_to_0565 (d);
513 }
514 else if (m)
515 {
516 d = *dst;
517 d = over (in (src, m), convert_0565_to_0888 (d));
518 *dst = convert_8888_to_0565 (d);
519 }
520 dst++;
521 }
522 }
523 }
524
525 static void
526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527 pixman_composite_info_t *info)
528 {
529 PIXMAN_COMPOSITE_ARGS (info);
530 uint32_t src, srca, s;
531 uint16_t src16;
532 uint16_t *dst_line, *dst;
533 uint32_t d;
534 uint32_t *mask_line, *mask, ma;
535 int dst_stride, mask_stride;
536 int32_t w;
537
538 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539
540 srca = src >> 24;
541 if (src == 0)
542 return;
543
544 src16 = convert_8888_to_0565 (src);
545
546 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548
549 while (height--)
550 {
551 dst = dst_line;
552 dst_line += dst_stride;
553 mask = mask_line;
554 mask_line += mask_stride;
555 w = width;
556
557 while (w--)
558 {
559 ma = *mask++;
560 if (ma == 0xffffffff)
561 {
562 if (srca == 0xff)
563 {
564 *dst = src16;
565 }
566 else
567 {
568 d = *dst;
569 d = over (src, convert_0565_to_0888 (d));
570 *dst = convert_8888_to_0565 (d);
571 }
572 }
573 else if (ma)
574 {
575 d = *dst;
576 d = convert_0565_to_0888 (d);
577
578 s = src;
579
580 UN8x4_MUL_UN8x4 (s, ma);
581 UN8x4_MUL_UN8 (ma, srca);
582 ma = ~ma;
583 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584
585 *dst = convert_8888_to_0565 (d);
586 }
587 dst++;
588 }
589 }
590 }
591
592 static void
593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594 pixman_composite_info_t *info)
595 {
596 PIXMAN_COMPOSITE_ARGS (info);
597 uint32_t *dst_line, *dst;
598 uint32_t *src_line, *src, s;
599 int dst_stride, src_stride;
600 uint8_t a;
601 int32_t w;
602
603 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605
606 while (height--)
607 {
608 dst = dst_line;
609 dst_line += dst_stride;
610 src = src_line;
611 src_line += src_stride;
612 w = width;
613
614 while (w--)
615 {
616 s = *src++;
617 a = s >> 24;
618 if (a == 0xff)
619 *dst = s;
620 else if (s)
621 *dst = over (s, *dst);
622 dst++;
623 }
624 }
625 }
626
627 static void
628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629 pixman_composite_info_t *info)
630 {
631 PIXMAN_COMPOSITE_ARGS (info);
632 uint32_t *dst_line, *dst;
633 uint32_t *src_line, *src;
634 int dst_stride, src_stride;
635 int32_t w;
636
637 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639
640 while (height--)
641 {
642 dst = dst_line;
643 dst_line += dst_stride;
644 src = src_line;
645 src_line += src_stride;
646 w = width;
647
648 while (w--)
649 *dst++ = (*src++) | 0xff000000;
650 }
651 }
652
653 #if 0
654 static void
655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656 pixman_composite_info_t *info)
657 {
658 PIXMAN_COMPOSITE_ARGS (info);
659 uint8_t *dst_line, *dst;
660 uint32_t d;
661 uint32_t *src_line, *src, s;
662 uint8_t a;
663 int dst_stride, src_stride;
664 int32_t w;
665
666 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668
669 while (height--)
670 {
671 dst = dst_line;
672 dst_line += dst_stride;
673 src = src_line;
674 src_line += src_stride;
675 w = width;
676
677 while (w--)
678 {
679 s = *src++;
680 a = s >> 24;
681 if (a)
682 {
683 if (a == 0xff)
684 d = s;
685 else
686 d = over (s, fetch_24 (dst));
687
688 store_24 (dst, d);
689 }
690 dst += 3;
691 }
692 }
693 }
694 #endif
695
696 static void
697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698 pixman_composite_info_t *info)
699 {
700 PIXMAN_COMPOSITE_ARGS (info);
701 uint16_t *dst_line, *dst;
702 uint32_t d;
703 uint32_t *src_line, *src, s;
704 uint8_t a;
705 int dst_stride, src_stride;
706 int32_t w;
707
708 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710
711 while (height--)
712 {
713 dst = dst_line;
714 dst_line += dst_stride;
715 src = src_line;
716 src_line += src_stride;
717 w = width;
718
719 while (w--)
720 {
721 s = *src++;
722 a = s >> 24;
723 if (s)
724 {
725 if (a == 0xff)
726 {
727 d = s;
728 }
729 else
730 {
731 d = *dst;
732 d = over (s, convert_0565_to_0888 (d));
733 }
734 *dst = convert_8888_to_0565 (d);
735 }
736 dst++;
737 }
738 }
739 }
740
741 static void
742 fast_composite_add_8_8 (pixman_implementation_t *imp,
743 pixman_composite_info_t *info)
744 {
745 PIXMAN_COMPOSITE_ARGS (info);
746 uint8_t *dst_line, *dst;
747 uint8_t *src_line, *src;
748 int dst_stride, src_stride;
749 int32_t w;
750 uint8_t s, d;
751 uint16_t t;
752
753 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
754 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
755
756 while (height--)
757 {
758 dst = dst_line;
759 dst_line += dst_stride;
760 src = src_line;
761 src_line += src_stride;
762 w = width;
763
764 while (w--)
765 {
766 s = *src++;
767 if (s)
768 {
769 if (s != 0xff)
770 {
771 d = *dst;
772 t = d + s;
773 s = t | (0 - (t >> 8));
774 }
775 *dst = s;
776 }
777 dst++;
778 }
779 }
780 }
781
782 static void
783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
784 pixman_composite_info_t *info)
785 {
786 PIXMAN_COMPOSITE_ARGS (info);
787 uint16_t *dst_line, *dst;
788 uint32_t d;
789 uint16_t *src_line, *src;
790 uint32_t s;
791 int dst_stride, src_stride;
792 int32_t w;
793
794 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
795 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
796
797 while (height--)
798 {
799 dst = dst_line;
800 dst_line += dst_stride;
801 src = src_line;
802 src_line += src_stride;
803 w = width;
804
805 while (w--)
806 {
807 s = *src++;
808 if (s)
809 {
810 d = *dst;
811 s = convert_0565_to_8888 (s);
812 if (d)
813 {
814 d = convert_0565_to_8888 (d);
815 UN8x4_ADD_UN8x4 (s, d);
816 }
817 *dst = convert_8888_to_0565 (s);
818 }
819 dst++;
820 }
821 }
822 }
823
824 static void
825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
826 pixman_composite_info_t *info)
827 {
828 PIXMAN_COMPOSITE_ARGS (info);
829 uint32_t *dst_line, *dst;
830 uint32_t *src_line, *src;
831 int dst_stride, src_stride;
832 int32_t w;
833 uint32_t s, d;
834
835 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
836 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
837
838 while (height--)
839 {
840 dst = dst_line;
841 dst_line += dst_stride;
842 src = src_line;
843 src_line += src_stride;
844 w = width;
845
846 while (w--)
847 {
848 s = *src++;
849 if (s)
850 {
851 if (s != 0xffffffff)
852 {
853 d = *dst;
854 if (d)
855 UN8x4_ADD_UN8x4 (s, d);
856 }
857 *dst = s;
858 }
859 dst++;
860 }
861 }
862 }
863
864 static void
865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
866 pixman_composite_info_t *info)
867 {
868 PIXMAN_COMPOSITE_ARGS (info);
869 uint8_t *dst_line, *dst;
870 uint8_t *mask_line, *mask;
871 int dst_stride, mask_stride;
872 int32_t w;
873 uint32_t src;
874 uint8_t sa;
875
876 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
877 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
878 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
879 sa = (src >> 24);
880
881 while (height--)
882 {
883 dst = dst_line;
884 dst_line += dst_stride;
885 mask = mask_line;
886 mask_line += mask_stride;
887 w = width;
888
889 while (w--)
890 {
891 uint16_t tmp;
892 uint16_t a;
893 uint32_t m, d;
894 uint32_t r;
895
896 a = *mask++;
897 d = *dst;
898
899 m = MUL_UN8 (sa, a, tmp);
900 r = ADD_UN8 (m, d, tmp);
901
902 *dst++ = r;
903 }
904 }
905 }
906
907 #ifdef WORDS_BIGENDIAN
908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
909 #define UPDATE_BITMASK(n) ((n) >> 1)
910 #else
911 #define CREATE_BITMASK(n) (1 << (n))
912 #define UPDATE_BITMASK(n) ((n) << 1)
913 #endif
914
915 #define TEST_BIT(p, n) \
916 (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
917 #define SET_BIT(p, n) \
918 do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
919
920 static void
921 fast_composite_add_1_1 (pixman_implementation_t *imp,
922 pixman_composite_info_t *info)
923 {
924 PIXMAN_COMPOSITE_ARGS (info);
925 uint32_t *dst_line, *dst;
926 uint32_t *src_line, *src;
927 int dst_stride, src_stride;
928 int32_t w;
929
930 PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
931 src_stride, src_line, 1);
932 PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
933 dst_stride, dst_line, 1);
934
935 while (height--)
936 {
937 dst = dst_line;
938 dst_line += dst_stride;
939 src = src_line;
940 src_line += src_stride;
941 w = width;
942
943 while (w--)
944 {
945 /*
946 * TODO: improve performance by processing uint32_t data instead
947 * of individual bits
948 */
949 if (TEST_BIT (src, src_x + w))
950 SET_BIT (dst, dest_x + w);
951 }
952 }
953 }
954
955 static void
956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
957 pixman_composite_info_t *info)
958 {
959 PIXMAN_COMPOSITE_ARGS (info);
960 uint32_t src, srca;
961 uint32_t *dst, *dst_line;
962 uint32_t *mask, *mask_line;
963 int mask_stride, dst_stride;
964 uint32_t bitcache, bitmask;
965 int32_t w;
966
967 if (width <= 0)
968 return;
969
970 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
971 srca = src >> 24;
972 if (src == 0)
973 return;
974
975 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
976 dst_stride, dst_line, 1);
977 PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
978 mask_stride, mask_line, 1);
979 mask_line += mask_x >> 5;
980
981 if (srca == 0xff)
982 {
983 while (height--)
984 {
985 dst = dst_line;
986 dst_line += dst_stride;
987 mask = mask_line;
988 mask_line += mask_stride;
989 w = width;
990
991 bitcache = *mask++;
992 bitmask = CREATE_BITMASK (mask_x & 31);
993
994 while (w--)
995 {
996 if (bitmask == 0)
997 {
998 bitcache = *mask++;
999 bitmask = CREATE_BITMASK (0);
1000 }
1001 if (bitcache & bitmask)
1002 *dst = src;
1003 bitmask = UPDATE_BITMASK (bitmask);
1004 dst++;
1005 }
1006 }
1007 }
1008 else
1009 {
1010 while (height--)
1011 {
1012 dst = dst_line;
1013 dst_line += dst_stride;
1014 mask = mask_line;
1015 mask_line += mask_stride;
1016 w = width;
1017
1018 bitcache = *mask++;
1019 bitmask = CREATE_BITMASK (mask_x & 31);
1020
1021 while (w--)
1022 {
1023 if (bitmask == 0)
1024 {
1025 bitcache = *mask++;
1026 bitmask = CREATE_BITMASK (0);
1027 }
1028 if (bitcache & bitmask)
1029 *dst = over (src, *dst);
1030 bitmask = UPDATE_BITMASK (bitmask);
1031 dst++;
1032 }
1033 }
1034 }
1035 }
1036
1037 static void
1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1039 pixman_composite_info_t *info)
1040 {
1041 PIXMAN_COMPOSITE_ARGS (info);
1042 uint32_t src, srca;
1043 uint16_t *dst, *dst_line;
1044 uint32_t *mask, *mask_line;
1045 int mask_stride, dst_stride;
1046 uint32_t bitcache, bitmask;
1047 int32_t w;
1048 uint32_t d;
1049 uint16_t src565;
1050
1051 if (width <= 0)
1052 return;
1053
1054 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1055 srca = src >> 24;
1056 if (src == 0)
1057 return;
1058
1059 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1060 dst_stride, dst_line, 1);
1061 PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1062 mask_stride, mask_line, 1);
1063 mask_line += mask_x >> 5;
1064
1065 if (srca == 0xff)
1066 {
1067 src565 = convert_8888_to_0565 (src);
1068 while (height--)
1069 {
1070 dst = dst_line;
1071 dst_line += dst_stride;
1072 mask = mask_line;
1073 mask_line += mask_stride;
1074 w = width;
1075
1076 bitcache = *mask++;
1077 bitmask = CREATE_BITMASK (mask_x & 31);
1078
1079 while (w--)
1080 {
1081 if (bitmask == 0)
1082 {
1083 bitcache = *mask++;
1084 bitmask = CREATE_BITMASK (0);
1085 }
1086 if (bitcache & bitmask)
1087 *dst = src565;
1088 bitmask = UPDATE_BITMASK (bitmask);
1089 dst++;
1090 }
1091 }
1092 }
1093 else
1094 {
1095 while (height--)
1096 {
1097 dst = dst_line;
1098 dst_line += dst_stride;
1099 mask = mask_line;
1100 mask_line += mask_stride;
1101 w = width;
1102
1103 bitcache = *mask++;
1104 bitmask = CREATE_BITMASK (mask_x & 31);
1105
1106 while (w--)
1107 {
1108 if (bitmask == 0)
1109 {
1110 bitcache = *mask++;
1111 bitmask = CREATE_BITMASK (0);
1112 }
1113 if (bitcache & bitmask)
1114 {
1115 d = over (src, convert_0565_to_0888 (*dst));
1116 *dst = convert_8888_to_0565 (d);
1117 }
1118 bitmask = UPDATE_BITMASK (bitmask);
1119 dst++;
1120 }
1121 }
1122 }
1123 }
1124
1125 /*
1126 * Simple bitblt
1127 */
1128
1129 static void
1130 fast_composite_solid_fill (pixman_implementation_t *imp,
1131 pixman_composite_info_t *info)
1132 {
1133 PIXMAN_COMPOSITE_ARGS (info);
1134 uint32_t src;
1135
1136 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1137
1138 if (dest_image->bits.format == PIXMAN_a1)
1139 {
1140 src = src >> 31;
1141 }
1142 else if (dest_image->bits.format == PIXMAN_a8)
1143 {
1144 src = src >> 24;
1145 }
1146 else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1147 dest_image->bits.format == PIXMAN_b5g6r5)
1148 {
1149 src = convert_8888_to_0565 (src);
1150 }
1151
1152 pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1153 PIXMAN_FORMAT_BPP (dest_image->bits.format),
1154 dest_x, dest_y,
1155 width, height,
1156 src);
1157 }
1158
1159 static void
1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
1161 pixman_composite_info_t *info)
1162 {
1163 PIXMAN_COMPOSITE_ARGS (info);
1164 int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1165 uint32_t n_bytes = width * bpp;
1166 int dst_stride, src_stride;
1167 uint8_t *dst;
1168 uint8_t *src;
1169
1170 src_stride = src_image->bits.rowstride * 4;
1171 dst_stride = dest_image->bits.rowstride * 4;
1172
1173 src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1174 dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1175
1176 while (height--)
1177 {
1178 memcpy (dst, src, n_bytes);
1179
1180 dst += dst_stride;
1181 src += src_stride;
1182 }
1183 }
1184
1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1205
1206 static force_inline void
1207 scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst,
1208 const uint32_t * mask,
1209 const uint32_t * src_top,
1210 const uint32_t * src_bottom,
1211 int32_t w,
1212 int wt,
1213 int wb,
1214 pixman_fixed_t vx,
1215 pixman_fixed_t unit_x,
1216 pixman_fixed_t max_vx,
1217 pixman_bool_t zero_src)
1218 {
1219 while ((w -= 1) >= 0)
1220 {
1221 uint32_t tl = src_top [pixman_fixed_to_int (vx)];
1222 uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1223 uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
1224 uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1225 uint32_t src, result;
1226 uint16_t d;
1227 d = *dst;
1228 src = bilinear_interpolation (tl, tr,
1229 bl, br,
1230 pixman_fixed_to_bilinear_weight(vx),
1231 wb);
1232 vx += unit_x;
1233 result = over (src, convert_0565_to_0888 (d));
1234 *dst++ = convert_8888_to_0565 (result);
1235 }
1236 }
1237
1238 static force_inline void
1239 scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst,
1240 const uint32_t * mask,
1241 const uint32_t * src_top,
1242 const uint32_t * src_bottom,
1243 int32_t w,
1244 int wt,
1245 int wb,
1246 pixman_fixed_t vx,
1247 pixman_fixed_t unit_x,
1248 pixman_fixed_t max_vx,
1249 pixman_bool_t zero_src)
1250 {
1251 while ((w -= 1) >= 0)
1252 {
1253 uint32_t tl = src_top [pixman_fixed_to_int (vx)];
1254 uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1255 uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
1256 uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1257 uint32_t src;
1258 uint32_t d;
1259 uint32_t result;
1260 d = *dst;
1261 src = bilinear_interpolation (tl, tr,
1262 bl, br,
1263 pixman_fixed_to_bilinear_weight(vx),
1264 wb);
1265 vx += unit_x;
1266 *dst++ = over (src, d);
1267 }
1268 }
1269
1270 #ifndef LOWER_QUALITY_INTERPOLATION
1271
1272 static force_inline void
1273 scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
1274 const uint32_t * mask,
1275 const uint16_t * src_top,
1276 const uint16_t * src_bottom,
1277 int32_t w,
1278 int wt,
1279 int wb,
1280 pixman_fixed_t vx,
1281 pixman_fixed_t unit_x,
1282 pixman_fixed_t max_vx,
1283 pixman_bool_t zero_src)
1284 {
1285 while ((w -= 1) >= 0)
1286 {
1287 uint16_t tl = src_top [pixman_fixed_to_int (vx)];
1288 uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1289 uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
1290 uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1291 uint32_t d;
1292 d = bilinear_interpolation(convert_0565_to_8888 (tl),
1293 convert_0565_to_8888 (tr),
1294 convert_0565_to_8888 (bl),
1295 convert_0565_to_8888 (br),
1296 pixman_fixed_to_bilinear_weight (vx),
1297 wb);
1298 vx += unit_x;
1299 *dst++ = convert_8888_to_0565 (d);
1300 }
1301 }
1302
1303 #else
1304
1305 /* This is a clever low resolution bilinear interpolation inspired by the code
1306 in Skia */
1307
1308 /* This takes the green component from the 565 representation and moves it:
1309 00000000 00000000 rrrrrggg gggbbbbb
1310
1311 00000ggg ggg00000 rrrrr000 000bbbbb
1312
1313 This gives us 5 extra bits of space before each component to let us do
1314 SWAR style optimizations
1315 */
1316
1317 #define GREEN_MASK (((1 << 6) - 1) << 5)
1318
1319 static inline uint32_t
1320 expand_rgb_565 (uint16_t c) {
1321 return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK);
1322 }
1323
1324 static inline uint16_t
1325 compact_rgb_565 (uint32_t c) {
1326 return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK);
1327 }
1328
1329 static inline uint16_t
1330 bilinear_interpolation_565(uint16_t tl, uint16_t tr,
1331 uint16_t bl, uint16_t br,
1332 int x, int y)
1333 {
1334 int xy;
1335 uint32_t a00 = expand_rgb_565 (tl);
1336 uint32_t a01 = expand_rgb_565 (tr);
1337 uint32_t a10 = expand_rgb_565 (bl);
1338 uint32_t a11 = expand_rgb_565 (br);
1339
1340 xy = (x * y) >> 3;
1341 return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) +
1342 a01 * (2*x - xy) +
1343 a10 * (2*y - xy) +
1344 a11 * xy) >> 5);
1345 }
1346
1347 static force_inline void
1348 scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
1349 const uint32_t * mask,
1350 const uint16_t * src_top,
1351 const uint16_t * src_bottom,
1352 int32_t w,
1353 int wt,
1354 int wb,
1355 pixman_fixed_t vx,
1356 pixman_fixed_t unit_x,
1357 pixman_fixed_t max_vx,
1358 pixman_bool_t zero_src)
1359 {
1360 while ((w -= 1) >= 0)
1361 {
1362 uint16_t tl = src_top [pixman_fixed_to_int (vx)];
1363 uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1364 uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
1365 uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1366
1367 uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br,
1368 pixman_fixed_to_bilinear_weight(vx),
1369 wb);
1370 vx += unit_x;
1371 *dst++ = d;
1372 }
1373 }
1374
1375 #endif
1376
1377 FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
1378 scaled_bilinear_scanline_565_565_SRC, NULL,
1379 uint16_t, uint32_t, uint16_t,
1380 COVER, FLAG_NONE)
1381 FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
1382 scaled_bilinear_scanline_565_565_SRC, NULL,
1383 uint16_t, uint32_t, uint16_t,
1384 PAD, FLAG_NONE)
1385 FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
1386 scaled_bilinear_scanline_565_565_SRC, NULL,
1387 uint16_t, uint32_t, uint16_t,
1388 NONE, FLAG_NONE)
1389 FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
1390 scaled_bilinear_scanline_565_565_SRC, NULL,
1391 uint16_t, uint32_t, uint16_t,
1392 NORMAL, FLAG_NONE)
1393
1394 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
1395 scaled_bilinear_scanline_8888_565_OVER, NULL,
1396 uint32_t, uint32_t, uint16_t,
1397 COVER, FLAG_NONE)
1398 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
1399 scaled_bilinear_scanline_8888_565_OVER, NULL,
1400 uint32_t, uint32_t, uint16_t,
1401 PAD, FLAG_NONE)
1402 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
1403 scaled_bilinear_scanline_8888_565_OVER, NULL,
1404 uint32_t, uint32_t, uint16_t,
1405 NONE, FLAG_NONE)
1406 FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
1407 scaled_bilinear_scanline_8888_565_OVER, NULL,
1408 uint32_t, uint32_t, uint16_t,
1409 NORMAL, FLAG_NONE)
1410
1411 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
1412 scaled_bilinear_scanline_8888_8888_OVER, NULL,
1413 uint32_t, uint32_t, uint32_t,
1414 COVER, FLAG_NONE)
1415 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
1416 scaled_bilinear_scanline_8888_8888_OVER, NULL,
1417 uint32_t, uint32_t, uint32_t,
1418 PAD, FLAG_NONE)
1419 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
1420 scaled_bilinear_scanline_8888_8888_OVER, NULL,
1421 uint32_t, uint32_t, uint32_t,
1422 NONE, FLAG_NONE)
1423 FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
1424 scaled_bilinear_scanline_8888_8888_OVER, NULL,
1425 uint32_t, uint32_t, uint32_t,
1426 NORMAL, FLAG_NONE)
1427
1428 #define REPEAT_MIN_WIDTH 32
1429
1430 static void
1431 fast_composite_tiled_repeat (pixman_implementation_t *imp,
1432 pixman_composite_info_t *info)
1433 {
1434 PIXMAN_COMPOSITE_ARGS (info);
1435 pixman_composite_func_t func;
1436 pixman_format_code_t mask_format;
1437 uint32_t src_flags, mask_flags;
1438 int32_t sx, sy;
1439 int32_t width_remain;
1440 int32_t num_pixels;
1441 int32_t src_width;
1442 int32_t i, j;
1443 pixman_image_t extended_src_image;
1444 uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
1445 pixman_bool_t need_src_extension;
1446 uint32_t *src_line;
1447 int32_t src_stride;
1448 int32_t src_bpp;
1449 pixman_composite_info_t info2 = *info;
1450
1451 src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
1452 FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
1453
1454 if (mask_image)
1455 {
1456 mask_format = mask_image->common.extended_format_code;
1457 mask_flags = info->mask_flags;
1458 }
1459 else
1460 {
1461 mask_format = PIXMAN_null;
1462 mask_flags = FAST_PATH_IS_OPAQUE;
1463 }
1464
1465 _pixman_implementation_lookup_composite (
1466 imp->toplevel, info->op,
1467 src_image->common.extended_format_code, src_flags,
1468 mask_format, mask_flags,
1469 dest_image->common.extended_format_code, info->dest_flags,
1470 &imp, &func);
1471
1472 src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
1473
1474 if (src_image->bits.width < REPEAT_MIN_WIDTH &&
1475 (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
1476 !src_image->bits.indexed)
1477 {
1478 sx = src_x;
1479 sx = MOD (sx, src_image->bits.width);
1480 sx += width;
1481 src_width = 0;
1482
1483 while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
1484 src_width += src_image->bits.width;
1485
1486 src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
1487
1488 /* Initialize/validate stack-allocated temporary image */
1489 _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
1490 src_width, 1, &extended_src[0], src_stride,
1491 FALSE);
1492 _pixman_image_validate (&extended_src_image);
1493
1494 info2.src_image = &extended_src_image;
1495 need_src_extension = TRUE;
1496 }
1497 else
1498 {
1499 src_width = src_image->bits.width;
1500 need_src_extension = FALSE;
1501 }
1502
1503 sx = src_x;
1504 sy = src_y;
1505
1506 while (--height >= 0)
1507 {
1508 sx = MOD (sx, src_width);
1509 sy = MOD (sy, src_image->bits.height);
1510
1511 if (need_src_extension)
1512 {
1513 if (src_bpp == 32)
1514 {
1515 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
1516
1517 for (i = 0; i < src_width; )
1518 {
1519 for (j = 0; j < src_image->bits.width; j++, i++)
1520 extended_src[i] = src_line[j];
1521 }
1522 }
1523 else if (src_bpp == 16)
1524 {
1525 uint16_t *src_line_16;
1526
1527 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
1528 src_line_16, 1);
1529 src_line = (uint32_t*)src_line_16;
1530
1531 for (i = 0; i < src_width; )
1532 {
1533 for (j = 0; j < src_image->bits.width; j++, i++)
1534 ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
1535 }
1536 }
1537 else if (src_bpp == 8)
1538 {
1539 uint8_t *src_line_8;
1540
1541 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
1542 src_line_8, 1);
1543 src_line = (uint32_t*)src_line_8;
1544
1545 for (i = 0; i < src_width; )
1546 {
1547 for (j = 0; j < src_image->bits.width; j++, i++)
1548 ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
1549 }
1550 }
1551
1552 info2.src_y = 0;
1553 }
1554 else
1555 {
1556 info2.src_y = sy;
1557 }
1558
1559 width_remain = width;
1560
1561 while (width_remain > 0)
1562 {
1563 num_pixels = src_width - sx;
1564
1565 if (num_pixels > width_remain)
1566 num_pixels = width_remain;
1567
1568 info2.src_x = sx;
1569 info2.width = num_pixels;
1570 info2.height = 1;
1571
1572 func (imp, &info2);
1573
1574 width_remain -= num_pixels;
1575 info2.mask_x += num_pixels;
1576 info2.dest_x += num_pixels;
1577 sx = 0;
1578 }
1579
1580 sx = src_x;
1581 sy++;
1582 info2.mask_x = info->mask_x;
1583 info2.mask_y++;
1584 info2.dest_x = info->dest_x;
1585 info2.dest_y++;
1586 }
1587
1588 if (need_src_extension)
1589 _pixman_image_fini (&extended_src_image);
1590 }
1591
1592 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1593 static force_inline void
1594 scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
1595 const uint16_t * src,
1596 int32_t w,
1597 pixman_fixed_t vx,
1598 pixman_fixed_t unit_x,
1599 pixman_fixed_t max_vx,
1600 pixman_bool_t fully_transparent_src)
1601 {
1602 uint16_t tmp1, tmp2, tmp3, tmp4;
1603 while ((w -= 4) >= 0)
1604 {
1605 tmp1 = *(src + pixman_fixed_to_int (vx));
1606 vx += unit_x;
1607 tmp2 = *(src + pixman_fixed_to_int (vx));
1608 vx += unit_x;
1609 tmp3 = *(src + pixman_fixed_to_int (vx));
1610 vx += unit_x;
1611 tmp4 = *(src + pixman_fixed_to_int (vx));
1612 vx += unit_x;
1613 *dst++ = tmp1;
1614 *dst++ = tmp2;
1615 *dst++ = tmp3;
1616 *dst++ = tmp4;
1617 }
1618 if (w & 2)
1619 {
1620 tmp1 = *(src + pixman_fixed_to_int (vx));
1621 vx += unit_x;
1622 tmp2 = *(src + pixman_fixed_to_int (vx));
1623 vx += unit_x;
1624 *dst++ = tmp1;
1625 *dst++ = tmp2;
1626 }
1627 if (w & 1)
1628 *dst = *(src + pixman_fixed_to_int (vx));
1629 }
1630
1631 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1632 scaled_nearest_scanline_565_565_SRC,
1633 uint16_t, uint16_t, COVER)
1634 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1635 scaled_nearest_scanline_565_565_SRC,
1636 uint16_t, uint16_t, NONE)
1637 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1638 scaled_nearest_scanline_565_565_SRC,
1639 uint16_t, uint16_t, PAD)
1640
1641 static force_inline uint32_t
1642 fetch_nearest (pixman_repeat_t src_repeat,
1643 pixman_format_code_t format,
1644 uint32_t *src, int x, int src_width)
1645 {
1646 if (repeat (src_repeat, &x, src_width))
1647 {
1648 if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
1649 return *(src + x) | 0xff000000;
1650 else
1651 return *(src + x);
1652 }
1653 else
1654 {
1655 return 0;
1656 }
1657 }
1658
1659 static force_inline void
1660 combine_over (uint32_t s, uint32_t *dst)
1661 {
1662 if (s)
1663 {
1664 uint8_t ia = 0xff - (s >> 24);
1665
1666 if (ia)
1667 UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1668 else
1669 *dst = s;
1670 }
1671 }
1672
1673 static force_inline void
1674 combine_src (uint32_t s, uint32_t *dst)
1675 {
1676 *dst = s;
1677 }
1678
1679 static void
1680 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1681 pixman_composite_info_t *info)
1682 {
1683 PIXMAN_COMPOSITE_ARGS (info);
1684 uint32_t *dst_line;
1685 uint32_t *src_line;
1686 int dst_stride, src_stride;
1687 int src_width, src_height;
1688 pixman_repeat_t src_repeat;
1689 pixman_fixed_t unit_x, unit_y;
1690 pixman_format_code_t src_format;
1691 pixman_vector_t v;
1692 pixman_fixed_t vy;
1693
1694 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1695 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1696 * transformed from destination space to source space
1697 */
1698 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1699
1700 /* reference point is the center of the pixel */
1701 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1702 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1703 v.vector[2] = pixman_fixed_1;
1704
1705 if (!pixman_transform_point_3d (src_image->common.transform, &v))
1706 return;
1707
1708 unit_x = src_image->common.transform->matrix[0][0];
1709 unit_y = src_image->common.transform->matrix[1][1];
1710
1711 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1712 v.vector[0] -= pixman_fixed_e;
1713 v.vector[1] -= pixman_fixed_e;
1714
1715 src_height = src_image->bits.height;
1716 src_width = src_image->bits.width;
1717 src_repeat = src_image->common.repeat;
1718 src_format = src_image->bits.format;
1719
1720 vy = v.vector[1];
1721 while (height--)
1722 {
1723 pixman_fixed_t vx = v.vector[0];
1724 int y = pixman_fixed_to_int (vy);
1725 uint32_t *dst = dst_line;
1726
1727 dst_line += dst_stride;
1728
1729 /* adjust the y location by a unit vector in the y direction
1730 * this is equivalent to transforming y+1 of the destination point to source space */
1731 vy += unit_y;
1732
1733 if (!repeat (src_repeat, &y, src_height))
1734 {
1735 if (op == PIXMAN_OP_SRC)
1736 memset (dst, 0, sizeof (*dst) * width);
1737 }
1738 else
1739 {
1740 int w = width;
1741
1742 uint32_t *src = src_line + y * src_stride;
1743
1744 while (w >= 2)
1745 {
1746 uint32_t s1, s2;
1747 int x1, x2;
1748
1749 x1 = pixman_fixed_to_int (vx);
1750 vx += unit_x;
1751
1752 x2 = pixman_fixed_to_int (vx);
1753 vx += unit_x;
1754
1755 w -= 2;
1756
1757 s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1758 s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1759
1760 if (op == PIXMAN_OP_OVER)
1761 {
1762 combine_over (s1, dst++);
1763 combine_over (s2, dst++);
1764 }
1765 else
1766 {
1767 combine_src (s1, dst++);
1768 combine_src (s2, dst++);
1769 }
1770 }
1771
1772 while (w--)
1773 {
1774 uint32_t s;
1775 int x;
1776
1777 x = pixman_fixed_to_int (vx);
1778 vx += unit_x;
1779
1780 s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1781
1782 if (op == PIXMAN_OP_OVER)
1783 combine_over (s, dst++);
1784 else
1785 combine_src (s, dst++);
1786 }
1787 }
1788 }
1789 }
1790
1791 #define CACHE_LINE_SIZE 64
1792
1793 #define FAST_SIMPLE_ROTATE(suffix, pix_type) \
1794 \
1795 static void \
1796 blt_rotated_90_trivial_##suffix (pix_type *dst, \
1797 int dst_stride, \
1798 const pix_type *src, \
1799 int src_stride, \
1800 int w, \
1801 int h) \
1802 { \
1803 int x, y; \
1804 for (y = 0; y < h; y++) \
1805 { \
1806 const pix_type *s = src + (h - y - 1); \
1807 pix_type *d = dst + dst_stride * y; \
1808 for (x = 0; x < w; x++) \
1809 { \
1810 *d++ = *s; \
1811 s += src_stride; \
1812 } \
1813 } \
1814 } \
1815 \
1816 static void \
1817 blt_rotated_270_trivial_##suffix (pix_type *dst, \
1818 int dst_stride, \
1819 const pix_type *src, \
1820 int src_stride, \
1821 int w, \
1822 int h) \
1823 { \
1824 int x, y; \
1825 for (y = 0; y < h; y++) \
1826 { \
1827 const pix_type *s = src + src_stride * (w - 1) + y; \
1828 pix_type *d = dst + dst_stride * y; \
1829 for (x = 0; x < w; x++) \
1830 { \
1831 *d++ = *s; \
1832 s -= src_stride; \
1833 } \
1834 } \
1835 } \
1836 \
1837 static void \
1838 blt_rotated_90_##suffix (pix_type *dst, \
1839 int dst_stride, \
1840 const pix_type *src, \
1841 int src_stride, \
1842 int W, \
1843 int H) \
1844 { \
1845 int x; \
1846 int leading_pixels = 0, trailing_pixels = 0; \
1847 const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
1848 \
1849 /* \
1850 * split processing into handling destination as TILE_SIZExH cache line \
1851 * aligned vertical stripes (optimistically assuming that destination \
1852 * stride is a multiple of cache line, if not - it will be just a bit \
1853 * slower) \
1854 */ \
1855 \
1856 if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
1857 { \
1858 leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
1859 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1860 if (leading_pixels > W) \
1861 leading_pixels = W; \
1862 \
1863 /* unaligned leading part NxH (where N < TILE_SIZE) */ \
1864 blt_rotated_90_trivial_##suffix ( \
1865 dst, \
1866 dst_stride, \
1867 src, \
1868 src_stride, \
1869 leading_pixels, \
1870 H); \
1871 \
1872 dst += leading_pixels; \
1873 src += leading_pixels * src_stride; \
1874 W -= leading_pixels; \
1875 } \
1876 \
1877 if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
1878 { \
1879 trailing_pixels = (((uintptr_t)(dst + W) & \
1880 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1881 if (trailing_pixels > W) \
1882 trailing_pixels = W; \
1883 W -= trailing_pixels; \
1884 } \
1885 \
1886 for (x = 0; x < W; x += TILE_SIZE) \
1887 { \
1888 /* aligned middle part TILE_SIZExH */ \
1889 blt_rotated_90_trivial_##suffix ( \
1890 dst + x, \
1891 dst_stride, \
1892 src + src_stride * x, \
1893 src_stride, \
1894 TILE_SIZE, \
1895 H); \
1896 } \
1897 \
1898 if (trailing_pixels) \
1899 { \
1900 /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
1901 blt_rotated_90_trivial_##suffix ( \
1902 dst + W, \
1903 dst_stride, \
1904 src + W * src_stride, \
1905 src_stride, \
1906 trailing_pixels, \
1907 H); \
1908 } \
1909 } \
1910 \
1911 static void \
1912 blt_rotated_270_##suffix (pix_type *dst, \
1913 int dst_stride, \
1914 const pix_type *src, \
1915 int src_stride, \
1916 int W, \
1917 int H) \
1918 { \
1919 int x; \
1920 int leading_pixels = 0, trailing_pixels = 0; \
1921 const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
1922 \
1923 /* \
1924 * split processing into handling destination as TILE_SIZExH cache line \
1925 * aligned vertical stripes (optimistically assuming that destination \
1926 * stride is a multiple of cache line, if not - it will be just a bit \
1927 * slower) \
1928 */ \
1929 \
1930 if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
1931 { \
1932 leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
1933 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1934 if (leading_pixels > W) \
1935 leading_pixels = W; \
1936 \
1937 /* unaligned leading part NxH (where N < TILE_SIZE) */ \
1938 blt_rotated_270_trivial_##suffix ( \
1939 dst, \
1940 dst_stride, \
1941 src + src_stride * (W - leading_pixels), \
1942 src_stride, \
1943 leading_pixels, \
1944 H); \
1945 \
1946 dst += leading_pixels; \
1947 W -= leading_pixels; \
1948 } \
1949 \
1950 if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
1951 { \
1952 trailing_pixels = (((uintptr_t)(dst + W) & \
1953 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1954 if (trailing_pixels > W) \
1955 trailing_pixels = W; \
1956 W -= trailing_pixels; \
1957 src += trailing_pixels * src_stride; \
1958 } \
1959 \
1960 for (x = 0; x < W; x += TILE_SIZE) \
1961 { \
1962 /* aligned middle part TILE_SIZExH */ \
1963 blt_rotated_270_trivial_##suffix ( \
1964 dst + x, \
1965 dst_stride, \
1966 src + src_stride * (W - x - TILE_SIZE), \
1967 src_stride, \
1968 TILE_SIZE, \
1969 H); \
1970 } \
1971 \
1972 if (trailing_pixels) \
1973 { \
1974 /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
1975 blt_rotated_270_trivial_##suffix ( \
1976 dst + W, \
1977 dst_stride, \
1978 src - trailing_pixels * src_stride, \
1979 src_stride, \
1980 trailing_pixels, \
1981 H); \
1982 } \
1983 } \
1984 \
1985 static void \
1986 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
1987 pixman_composite_info_t *info) \
1988 { \
1989 PIXMAN_COMPOSITE_ARGS (info); \
1990 pix_type *dst_line; \
1991 pix_type *src_line; \
1992 int dst_stride, src_stride; \
1993 int src_x_t, src_y_t; \
1994 \
1995 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \
1996 dst_stride, dst_line, 1); \
1997 src_x_t = -src_y + pixman_fixed_to_int ( \
1998 src_image->common.transform->matrix[0][2] + \
1999 pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
2000 src_y_t = src_x + pixman_fixed_to_int ( \
2001 src_image->common.transform->matrix[1][2] + \
2002 pixman_fixed_1 / 2 - pixman_fixed_e); \
2003 PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
2004 src_stride, src_line, 1); \
2005 blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
2006 width, height); \
2007 } \
2008 \
2009 static void \
2010 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
2011 pixman_composite_info_t *info) \
2012 { \
2013 PIXMAN_COMPOSITE_ARGS (info); \
2014 pix_type *dst_line; \
2015 pix_type *src_line; \
2016 int dst_stride, src_stride; \
2017 int src_x_t, src_y_t; \
2018 \
2019 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \
2020 dst_stride, dst_line, 1); \
2021 src_x_t = src_y + pixman_fixed_to_int ( \
2022 src_image->common.transform->matrix[0][2] + \
2023 pixman_fixed_1 / 2 - pixman_fixed_e); \
2024 src_y_t = -src_x + pixman_fixed_to_int ( \
2025 src_image->common.transform->matrix[1][2] + \
2026 pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
2027 PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
2028 src_stride, src_line, 1); \
2029 blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
2030 width, height); \
2031 }
2032
2033 FAST_SIMPLE_ROTATE (8, uint8_t)
2034 FAST_SIMPLE_ROTATE (565, uint16_t)
2035 FAST_SIMPLE_ROTATE (8888, uint32_t)
2036
2037 static const pixman_fast_path_t c_fast_paths[] =
2038 {
2039 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
2040 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
2041 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
2042 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
2043 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
2044 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
2045 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
2046 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
2047 PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
2048 PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
2049 PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
2050 PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
2051 PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
2052 PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
2053 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
2054 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
2055 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
2056 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
2057 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
2058 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
2059 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
2060 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
2061 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
2062 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
2063 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
2064 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
2065 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
2066 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
2067 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
2068 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
2069 PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
2070 PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
2071 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
2072 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
2073 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
2074 PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
2075 PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
2076 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
2077 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
2078 PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
2079 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
2080 PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
2081 PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
2082 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
2083 PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
2084 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
2085 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
2086 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
2087 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
2088 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
2089 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
2090 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
2091 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
2092 PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
2093 PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
2094 PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
2095 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
2096 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
2097 PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
2098 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
2099 PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
2100 PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
2101 PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
2102 PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
2103 PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
2104
2105 SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
2106 SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
2107 SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
2108 SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
2109
2110 SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
2111 SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
2112
2113 SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
2114 SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
2115
2116 SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
2117
2118 SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
2119 SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
2120 SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
2121 SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
2122 SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
2123 SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
2124
2125 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
2126 SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
2127 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
2128 SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
2129
2130 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
2131
2132 #define NEAREST_FAST_PATH(op,s,d) \
2133 { PIXMAN_OP_ ## op, \
2134 PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
2135 PIXMAN_null, 0, \
2136 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
2137 fast_composite_scaled_nearest, \
2138 }
2139
2140 NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
2141 NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
2142 NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
2143 NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
2144
2145 NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
2146 NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
2147 NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
2148 NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
2149
2150 NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
2151 NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
2152 NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
2153 NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
2154
2155 NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
2156 NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
2157 NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
2158 NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
2159
2160 #define SIMPLE_ROTATE_FLAGS(angle) \
2161 (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
2162 FAST_PATH_NEAREST_FILTER | \
2163 FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \
2164 FAST_PATH_STANDARD_FLAGS)
2165
2166 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
2167 { PIXMAN_OP_ ## op, \
2168 PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
2169 PIXMAN_null, 0, \
2170 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
2171 fast_composite_rotate_90_##suffix, \
2172 }, \
2173 { PIXMAN_OP_ ## op, \
2174 PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
2175 PIXMAN_null, 0, \
2176 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
2177 fast_composite_rotate_270_##suffix, \
2178 }
2179
2180 SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
2181 SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
2182 SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
2183 SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
2184 SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
2185
2186 /* Simple repeat fast path entry. */
2187 { PIXMAN_OP_any,
2188 PIXMAN_any,
2189 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
2190 FAST_PATH_NORMAL_REPEAT),
2191 PIXMAN_any, 0,
2192 PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
2193 fast_composite_tiled_repeat
2194 },
2195
2196 SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
2197 SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
2198 SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
2199
2200 { PIXMAN_OP_NONE },
2201 };
2202
2203 #ifdef WORDS_BIGENDIAN
2204 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
2205 #else
2206 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
2207 #endif
2208
2209 static force_inline void
2210 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
2211 {
2212 if (offs)
2213 {
2214 int leading_pixels = 32 - offs;
2215 if (leading_pixels >= width)
2216 {
2217 if (v)
2218 *dst |= A1_FILL_MASK (width, offs);
2219 else
2220 *dst &= ~A1_FILL_MASK (width, offs);
2221 return;
2222 }
2223 else
2224 {
2225 if (v)
2226 *dst++ |= A1_FILL_MASK (leading_pixels, offs);
2227 else
2228 *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
2229 width -= leading_pixels;
2230 }
2231 }
2232 while (width >= 32)
2233 {
2234 if (v)
2235 *dst++ = 0xFFFFFFFF;
2236 else
2237 *dst++ = 0;
2238 width -= 32;
2239 }
2240 if (width > 0)
2241 {
2242 if (v)
2243 *dst |= A1_FILL_MASK (width, 0);
2244 else
2245 *dst &= ~A1_FILL_MASK (width, 0);
2246 }
2247 }
2248
2249 static void
2250 pixman_fill1 (uint32_t *bits,
2251 int stride,
2252 int x,
2253 int y,
2254 int width,
2255 int height,
2256 uint32_t filler)
2257 {
2258 uint32_t *dst = bits + y * stride + (x >> 5);
2259 int offs = x & 31;
2260
2261 if (filler & 1)
2262 {
2263 while (height--)
2264 {
2265 pixman_fill1_line (dst, offs, width, 1);
2266 dst += stride;
2267 }
2268 }
2269 else
2270 {
2271 while (height--)
2272 {
2273 pixman_fill1_line (dst, offs, width, 0);
2274 dst += stride;
2275 }
2276 }
2277 }
2278
2279 static void
2280 pixman_fill8 (uint32_t *bits,
2281 int stride,
2282 int x,
2283 int y,
2284 int width,
2285 int height,
2286 uint32_t filler)
2287 {
2288 int byte_stride = stride * (int) sizeof (uint32_t);
2289 uint8_t *dst = (uint8_t *) bits;
2290 uint8_t v = filler & 0xff;
2291 int i;
2292
2293 dst = dst + y * byte_stride + x;
2294
2295 while (height--)
2296 {
2297 for (i = 0; i < width; ++i)
2298 dst[i] = v;
2299
2300 dst += byte_stride;
2301 }
2302 }
2303
2304 static void
2305 pixman_fill16 (uint32_t *bits,
2306 int stride,
2307 int x,
2308 int y,
2309 int width,
2310 int height,
2311 uint32_t filler)
2312 {
2313 int short_stride =
2314 (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
2315 uint16_t *dst = (uint16_t *)bits;
2316 uint16_t v = filler & 0xffff;
2317 int i;
2318
2319 dst = dst + y * short_stride + x;
2320
2321 while (height--)
2322 {
2323 for (i = 0; i < width; ++i)
2324 dst[i] = v;
2325
2326 dst += short_stride;
2327 }
2328 }
2329
2330 static void
2331 pixman_fill32 (uint32_t *bits,
2332 int stride,
2333 int x,
2334 int y,
2335 int width,
2336 int height,
2337 uint32_t filler)
2338 {
2339 int i;
2340
2341 bits = bits + y * stride + x;
2342
2343 while (height--)
2344 {
2345 for (i = 0; i < width; ++i)
2346 bits[i] = filler;
2347
2348 bits += stride;
2349 }
2350 }
2351
2352 static pixman_bool_t
2353 fast_path_fill (pixman_implementation_t *imp,
2354 uint32_t * bits,
2355 int stride,
2356 int bpp,
2357 int x,
2358 int y,
2359 int width,
2360 int height,
2361 uint32_t filler)
2362 {
2363 switch (bpp)
2364 {
2365 case 1:
2366 pixman_fill1 (bits, stride, x, y, width, height, filler);
2367 break;
2368
2369 case 8:
2370 pixman_fill8 (bits, stride, x, y, width, height, filler);
2371 break;
2372
2373 case 16:
2374 pixman_fill16 (bits, stride, x, y, width, height, filler);
2375 break;
2376
2377 case 32:
2378 pixman_fill32 (bits, stride, x, y, width, height, filler);
2379 break;
2380
2381 default:
2382 return FALSE;
2383 }
2384
2385 return TRUE;
2386 }
2387
2388 /*****************************************************************************/
2389
2390 static uint32_t *
2391 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
2392 {
2393 int32_t w = iter->width;
2394 uint32_t *dst = iter->buffer;
2395 const uint16_t *src = (const uint16_t *)iter->bits;
2396
2397 iter->bits += iter->stride;
2398
2399 /* Align the source buffer at 4 bytes boundary */
2400 if (w > 0 && ((uintptr_t)src & 3))
2401 {
2402 *dst++ = convert_0565_to_8888 (*src++);
2403 w--;
2404 }
2405 /* Process two pixels per iteration */
2406 while ((w -= 2) >= 0)
2407 {
2408 uint32_t sr, sb, sg, t0, t1;
2409 uint32_t s = *(const uint32_t *)src;
2410 src += 2;
2411 sr = (s >> 8) & 0x00F800F8;
2412 sb = (s << 3) & 0x00F800F8;
2413 sg = (s >> 3) & 0x00FC00FC;
2414 sr |= sr >> 5;
2415 sb |= sb >> 5;
2416 sg |= sg >> 6;
2417 t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
2418 (sb & 0xFF) | 0xFF000000;
2419 t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
2420 (sb >> 16) | 0xFF000000;
2421 #ifdef WORDS_BIGENDIAN
2422 *dst++ = t1;
2423 *dst++ = t0;
2424 #else
2425 *dst++ = t0;
2426 *dst++ = t1;
2427 #endif
2428 }
2429 if (w & 1)
2430 {
2431 *dst = convert_0565_to_8888 (*src);
2432 }
2433
2434 return iter->buffer;
2435 }
2436
2437 static uint32_t *
2438 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
2439 {
2440 iter->bits += iter->stride;
2441 return iter->buffer;
2442 }
2443
2444 /* Helper function for a workaround, which tries to ensure that 0x1F001F
2445 * constant is always allocated in a register on RISC architectures.
2446 */
2447 static force_inline uint32_t
2448 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
2449 {
2450 uint32_t a, b;
2451 a = (s >> 3) & x1F001F;
2452 b = s & 0xFC00;
2453 a |= a >> 5;
2454 a |= b >> 5;
2455 return a;
2456 }
2457
2458 static void
2459 fast_write_back_r5g6b5 (pixman_iter_t *iter)
2460 {
2461 int32_t w = iter->width;
2462 uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
2463 const uint32_t *src = iter->buffer;
2464 /* Workaround to ensure that x1F001F variable is allocated in a register */
2465 static volatile uint32_t volatile_x1F001F = 0x1F001F;
2466 uint32_t x1F001F = volatile_x1F001F;
2467
2468 while ((w -= 4) >= 0)
2469 {
2470 uint32_t s1 = *src++;
2471 uint32_t s2 = *src++;
2472 uint32_t s3 = *src++;
2473 uint32_t s4 = *src++;
2474 *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
2475 *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
2476 *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
2477 *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
2478 }
2479 if (w & 2)
2480 {
2481 *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2482 *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2483 }
2484 if (w & 1)
2485 {
2486 *dst = convert_8888_to_0565_workaround (*src, x1F001F);
2487 }
2488 }
2489
2490 typedef struct
2491 {
2492 pixman_format_code_t format;
2493 pixman_iter_get_scanline_t get_scanline;
2494 pixman_iter_write_back_t write_back;
2495 } fetcher_info_t;
2496
2497 static const fetcher_info_t fetchers[] =
2498 {
2499 { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
2500 { PIXMAN_null }
2501 };
2502
2503 static pixman_bool_t
2504 fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
2505 {
2506 pixman_image_t *image = iter->image;
2507
2508 #define FLAGS \
2509 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
2510 FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
2511
2512 if (iter->iter_flags & ITER_16)
2513 return FALSE;
2514
2515 if ((iter->iter_flags & ITER_NARROW) &&
2516 (iter->image_flags & FLAGS) == FLAGS)
2517 {
2518 const fetcher_info_t *f;
2519
2520 for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
2521 {
2522 if (image->common.extended_format_code == f->format)
2523 {
2524 uint8_t *b = (uint8_t *)image->bits.bits;
2525 int s = image->bits.rowstride * 4;
2526
2527 iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
2528 iter->stride = s;
2529
2530 iter->get_scanline = f->get_scanline;
2531 return TRUE;
2532 }
2533 }
2534 }
2535
2536 return FALSE;
2537 }
2538
2539 static pixman_bool_t
2540 fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
2541 {
2542 pixman_image_t *image = iter->image;
2543
2544 if (iter->iter_flags & ITER_16)
2545 return FALSE;
2546
2547 if ((iter->iter_flags & ITER_NARROW) &&
2548 (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
2549 {
2550 const fetcher_info_t *f;
2551
2552 for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
2553 {
2554 if (image->common.extended_format_code == f->format)
2555 {
2556 uint8_t *b = (uint8_t *)image->bits.bits;
2557 int s = image->bits.rowstride * 4;
2558
2559 iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
2560 iter->stride = s;
2561
2562 if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
2563 (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
2564 {
2565 iter->get_scanline = fast_dest_fetch_noop;
2566 }
2567 else
2568 {
2569 iter->get_scanline = f->get_scanline;
2570 }
2571 iter->write_back = f->write_back;
2572 return TRUE;
2573 }
2574 }
2575 }
2576 return FALSE;
2577 }
2578
2579
2580 pixman_implementation_t *
2581 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
2582 {
2583 pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
2584
2585 imp->fill = fast_path_fill;
2586 imp->src_iter_init = fast_src_iter_init;
2587 imp->dest_iter_init = fast_dest_iter_init;
2588
2589 return imp;
2590 }

mercurial