Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 * Copyright © 2007 Luca Barbato
3 *
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of Luca Barbato not be used in advertising or
9 * publicity pertaining to distribution of the software without specific,
10 * written prior permission. Luca Barbato makes no representations about the
11 * suitability of this software for any purpose. It is provided "as is"
12 * without express or implied warranty.
13 *
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21 * SOFTWARE.
22 *
23 * Author: Luca Barbato (lu_zero@gentoo.org)
24 *
25 * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
26 */
28 #include <config.h>
29 #include "pixman-private.h"
30 #include "pixman-combine32.h"
31 #include <altivec.h>
33 #define AVV(x...) {x}
35 static force_inline vector unsigned int
36 splat_alpha (vector unsigned int pix)
37 {
38 return vec_perm (pix, pix,
39 (vector unsigned char)AVV (
40 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
41 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
42 }
44 static force_inline vector unsigned int
45 pix_multiply (vector unsigned int p, vector unsigned int a)
46 {
47 vector unsigned short hi, lo, mod;
49 /* unpack to short */
50 hi = (vector unsigned short)
51 vec_mergeh ((vector unsigned char)AVV (0),
52 (vector unsigned char)p);
54 mod = (vector unsigned short)
55 vec_mergeh ((vector unsigned char)AVV (0),
56 (vector unsigned char)a);
58 hi = vec_mladd (hi, mod, (vector unsigned short)
59 AVV (0x0080, 0x0080, 0x0080, 0x0080,
60 0x0080, 0x0080, 0x0080, 0x0080));
62 hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
64 hi = vec_sr (hi, vec_splat_u16 (8));
66 /* unpack to short */
67 lo = (vector unsigned short)
68 vec_mergel ((vector unsigned char)AVV (0),
69 (vector unsigned char)p);
70 mod = (vector unsigned short)
71 vec_mergel ((vector unsigned char)AVV (0),
72 (vector unsigned char)a);
74 lo = vec_mladd (lo, mod, (vector unsigned short)
75 AVV (0x0080, 0x0080, 0x0080, 0x0080,
76 0x0080, 0x0080, 0x0080, 0x0080));
78 lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
80 lo = vec_sr (lo, vec_splat_u16 (8));
82 return (vector unsigned int)vec_packsu (hi, lo);
83 }
85 static force_inline vector unsigned int
86 pix_add (vector unsigned int a, vector unsigned int b)
87 {
88 return (vector unsigned int)vec_adds ((vector unsigned char)a,
89 (vector unsigned char)b);
90 }
92 static force_inline vector unsigned int
93 pix_add_mul (vector unsigned int x,
94 vector unsigned int a,
95 vector unsigned int y,
96 vector unsigned int b)
97 {
98 vector unsigned int t1, t2;
100 t1 = pix_multiply (x, a);
101 t2 = pix_multiply (y, b);
103 return pix_add (t1, t2);
104 }
106 static force_inline vector unsigned int
107 negate (vector unsigned int src)
108 {
109 return vec_nor (src, src);
110 }
112 /* dest*~srca + src */
113 static force_inline vector unsigned int
114 over (vector unsigned int src,
115 vector unsigned int srca,
116 vector unsigned int dest)
117 {
118 vector unsigned char tmp = (vector unsigned char)
119 pix_multiply (dest, negate (srca));
121 tmp = vec_adds ((vector unsigned char)src, tmp);
122 return (vector unsigned int)tmp;
123 }
125 /* in == pix_multiply */
126 #define in_over(src, srca, mask, dest) \
127 over (pix_multiply (src, mask), \
128 pix_multiply (srca, mask), dest)
131 #define COMPUTE_SHIFT_MASK(source) \
132 source ## _mask = vec_lvsl (0, source);
134 #define COMPUTE_SHIFT_MASKS(dest, source) \
135 dest ## _mask = vec_lvsl (0, dest); \
136 source ## _mask = vec_lvsl (0, source); \
137 store_mask = vec_lvsr (0, dest);
139 #define COMPUTE_SHIFT_MASKC(dest, source, mask) \
140 mask ## _mask = vec_lvsl (0, mask); \
141 dest ## _mask = vec_lvsl (0, dest); \
142 source ## _mask = vec_lvsl (0, source); \
143 store_mask = vec_lvsr (0, dest);
145 /* notice you have to declare temp vars...
146 * Note: tmp3 and tmp4 must remain untouched!
147 */
149 #define LOAD_VECTORS(dest, source) \
150 tmp1 = (typeof(tmp1))vec_ld (0, source); \
151 tmp2 = (typeof(tmp2))vec_ld (15, source); \
152 tmp3 = (typeof(tmp3))vec_ld (0, dest); \
153 v ## source = (typeof(v ## source)) \
154 vec_perm (tmp1, tmp2, source ## _mask); \
155 tmp4 = (typeof(tmp4))vec_ld (15, dest); \
156 v ## dest = (typeof(v ## dest)) \
157 vec_perm (tmp3, tmp4, dest ## _mask);
159 #define LOAD_VECTORSC(dest, source, mask) \
160 tmp1 = (typeof(tmp1))vec_ld (0, source); \
161 tmp2 = (typeof(tmp2))vec_ld (15, source); \
162 tmp3 = (typeof(tmp3))vec_ld (0, dest); \
163 v ## source = (typeof(v ## source)) \
164 vec_perm (tmp1, tmp2, source ## _mask); \
165 tmp4 = (typeof(tmp4))vec_ld (15, dest); \
166 tmp1 = (typeof(tmp1))vec_ld (0, mask); \
167 v ## dest = (typeof(v ## dest)) \
168 vec_perm (tmp3, tmp4, dest ## _mask); \
169 tmp2 = (typeof(tmp2))vec_ld (15, mask); \
170 v ## mask = (typeof(v ## mask)) \
171 vec_perm (tmp1, tmp2, mask ## _mask);
173 #define LOAD_VECTORSM(dest, source, mask) \
174 LOAD_VECTORSC (dest, source, mask) \
175 v ## source = pix_multiply (v ## source, \
176 splat_alpha (v ## mask));
178 #define STORE_VECTOR(dest) \
179 edges = vec_perm (tmp4, tmp3, dest ## _mask); \
180 tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
181 tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
182 vec_st ((vector unsigned int) tmp3, 15, dest); \
183 vec_st ((vector unsigned int) tmp1, 0, dest);
185 static void
186 vmx_combine_over_u_no_mask (uint32_t * dest,
187 const uint32_t *src,
188 int width)
189 {
190 int i;
191 vector unsigned int vdest, vsrc;
192 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
193 dest_mask, src_mask, store_mask;
195 COMPUTE_SHIFT_MASKS (dest, src);
197 /* printf ("%s\n",__PRETTY_FUNCTION__); */
198 for (i = width / 4; i > 0; i--)
199 {
201 LOAD_VECTORS (dest, src);
203 vdest = over (vsrc, splat_alpha (vsrc), vdest);
205 STORE_VECTOR (dest);
207 src += 4;
208 dest += 4;
209 }
211 for (i = width % 4; --i >= 0;)
212 {
213 uint32_t s = src[i];
214 uint32_t d = dest[i];
215 uint32_t ia = ALPHA_8 (~s);
217 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
219 dest[i] = d;
220 }
221 }
223 static void
224 vmx_combine_over_u_mask (uint32_t * dest,
225 const uint32_t *src,
226 const uint32_t *mask,
227 int width)
228 {
229 int i;
230 vector unsigned int vdest, vsrc, vmask;
231 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
232 dest_mask, src_mask, mask_mask, store_mask;
234 COMPUTE_SHIFT_MASKC (dest, src, mask);
236 /* printf ("%s\n",__PRETTY_FUNCTION__); */
237 for (i = width / 4; i > 0; i--)
238 {
239 LOAD_VECTORSM (dest, src, mask);
241 vdest = over (vsrc, splat_alpha (vsrc), vdest);
243 STORE_VECTOR (dest);
245 src += 4;
246 dest += 4;
247 mask += 4;
248 }
250 for (i = width % 4; --i >= 0;)
251 {
252 uint32_t m = ALPHA_8 (mask[i]);
253 uint32_t s = src[i];
254 uint32_t d = dest[i];
255 uint32_t ia;
257 UN8x4_MUL_UN8 (s, m);
259 ia = ALPHA_8 (~s);
261 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
262 dest[i] = d;
263 }
264 }
266 static void
267 vmx_combine_over_u (pixman_implementation_t *imp,
268 pixman_op_t op,
269 uint32_t * dest,
270 const uint32_t * src,
271 const uint32_t * mask,
272 int width)
273 {
274 if (mask)
275 vmx_combine_over_u_mask (dest, src, mask, width);
276 else
277 vmx_combine_over_u_no_mask (dest, src, width);
278 }
280 static void
281 vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
282 const uint32_t *src,
283 int width)
284 {
285 int i;
286 vector unsigned int vdest, vsrc;
287 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
288 dest_mask, src_mask, store_mask;
290 COMPUTE_SHIFT_MASKS (dest, src);
292 /* printf ("%s\n",__PRETTY_FUNCTION__); */
293 for (i = width / 4; i > 0; i--)
294 {
296 LOAD_VECTORS (dest, src);
298 vdest = over (vdest, splat_alpha (vdest), vsrc);
300 STORE_VECTOR (dest);
302 src += 4;
303 dest += 4;
304 }
306 for (i = width % 4; --i >= 0;)
307 {
308 uint32_t s = src[i];
309 uint32_t d = dest[i];
310 uint32_t ia = ALPHA_8 (~dest[i]);
312 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
313 dest[i] = s;
314 }
315 }
317 static void
318 vmx_combine_over_reverse_u_mask (uint32_t * dest,
319 const uint32_t *src,
320 const uint32_t *mask,
321 int width)
322 {
323 int i;
324 vector unsigned int vdest, vsrc, vmask;
325 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
326 dest_mask, src_mask, mask_mask, store_mask;
328 COMPUTE_SHIFT_MASKC (dest, src, mask);
330 /* printf ("%s\n",__PRETTY_FUNCTION__); */
331 for (i = width / 4; i > 0; i--)
332 {
334 LOAD_VECTORSM (dest, src, mask);
336 vdest = over (vdest, splat_alpha (vdest), vsrc);
338 STORE_VECTOR (dest);
340 src += 4;
341 dest += 4;
342 mask += 4;
343 }
345 for (i = width % 4; --i >= 0;)
346 {
347 uint32_t m = ALPHA_8 (mask[i]);
348 uint32_t s = src[i];
349 uint32_t d = dest[i];
350 uint32_t ia = ALPHA_8 (~dest[i]);
352 UN8x4_MUL_UN8 (s, m);
354 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
355 dest[i] = s;
356 }
357 }
359 static void
360 vmx_combine_over_reverse_u (pixman_implementation_t *imp,
361 pixman_op_t op,
362 uint32_t * dest,
363 const uint32_t * src,
364 const uint32_t * mask,
365 int width)
366 {
367 if (mask)
368 vmx_combine_over_reverse_u_mask (dest, src, mask, width);
369 else
370 vmx_combine_over_reverse_u_no_mask (dest, src, width);
371 }
373 static void
374 vmx_combine_in_u_no_mask (uint32_t * dest,
375 const uint32_t *src,
376 int width)
377 {
378 int i;
379 vector unsigned int vdest, vsrc;
380 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
381 dest_mask, src_mask, store_mask;
383 COMPUTE_SHIFT_MASKS (dest, src);
385 /* printf ("%s\n",__PRETTY_FUNCTION__); */
386 for (i = width / 4; i > 0; i--)
387 {
388 LOAD_VECTORS (dest, src);
390 vdest = pix_multiply (vsrc, splat_alpha (vdest));
392 STORE_VECTOR (dest);
394 src += 4;
395 dest += 4;
396 }
398 for (i = width % 4; --i >= 0;)
399 {
400 uint32_t s = src[i];
401 uint32_t a = ALPHA_8 (dest[i]);
403 UN8x4_MUL_UN8 (s, a);
404 dest[i] = s;
405 }
406 }
408 static void
409 vmx_combine_in_u_mask (uint32_t * dest,
410 const uint32_t *src,
411 const uint32_t *mask,
412 int width)
413 {
414 int i;
415 vector unsigned int vdest, vsrc, vmask;
416 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
417 dest_mask, src_mask, mask_mask, store_mask;
419 COMPUTE_SHIFT_MASKC (dest, src, mask);
421 /* printf ("%s\n",__PRETTY_FUNCTION__); */
422 for (i = width / 4; i > 0; i--)
423 {
424 LOAD_VECTORSM (dest, src, mask);
426 vdest = pix_multiply (vsrc, splat_alpha (vdest));
428 STORE_VECTOR (dest);
430 src += 4;
431 dest += 4;
432 mask += 4;
433 }
435 for (i = width % 4; --i >= 0;)
436 {
437 uint32_t m = ALPHA_8 (mask[i]);
438 uint32_t s = src[i];
439 uint32_t a = ALPHA_8 (dest[i]);
441 UN8x4_MUL_UN8 (s, m);
442 UN8x4_MUL_UN8 (s, a);
444 dest[i] = s;
445 }
446 }
448 static void
449 vmx_combine_in_u (pixman_implementation_t *imp,
450 pixman_op_t op,
451 uint32_t * dest,
452 const uint32_t * src,
453 const uint32_t * mask,
454 int width)
455 {
456 if (mask)
457 vmx_combine_in_u_mask (dest, src, mask, width);
458 else
459 vmx_combine_in_u_no_mask (dest, src, width);
460 }
462 static void
463 vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
464 const uint32_t *src,
465 int width)
466 {
467 int i;
468 vector unsigned int vdest, vsrc;
469 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
470 dest_mask, src_mask, store_mask;
472 COMPUTE_SHIFT_MASKS (dest, src);
474 /* printf ("%s\n",__PRETTY_FUNCTION__); */
475 for (i = width / 4; i > 0; i--)
476 {
477 LOAD_VECTORS (dest, src);
479 vdest = pix_multiply (vdest, splat_alpha (vsrc));
481 STORE_VECTOR (dest);
483 src += 4;
484 dest += 4;
485 }
487 for (i = width % 4; --i >= 0;)
488 {
489 uint32_t d = dest[i];
490 uint32_t a = ALPHA_8 (src[i]);
492 UN8x4_MUL_UN8 (d, a);
494 dest[i] = d;
495 }
496 }
498 static void
499 vmx_combine_in_reverse_u_mask (uint32_t * dest,
500 const uint32_t *src,
501 const uint32_t *mask,
502 int width)
503 {
504 int i;
505 vector unsigned int vdest, vsrc, vmask;
506 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
507 dest_mask, src_mask, mask_mask, store_mask;
509 COMPUTE_SHIFT_MASKC (dest, src, mask);
511 /* printf ("%s\n",__PRETTY_FUNCTION__); */
512 for (i = width / 4; i > 0; i--)
513 {
514 LOAD_VECTORSM (dest, src, mask);
516 vdest = pix_multiply (vdest, splat_alpha (vsrc));
518 STORE_VECTOR (dest);
520 src += 4;
521 dest += 4;
522 mask += 4;
523 }
525 for (i = width % 4; --i >= 0;)
526 {
527 uint32_t m = ALPHA_8 (mask[i]);
528 uint32_t d = dest[i];
529 uint32_t a = src[i];
531 UN8x4_MUL_UN8 (a, m);
532 a = ALPHA_8 (a);
533 UN8x4_MUL_UN8 (d, a);
535 dest[i] = d;
536 }
537 }
539 static void
540 vmx_combine_in_reverse_u (pixman_implementation_t *imp,
541 pixman_op_t op,
542 uint32_t * dest,
543 const uint32_t * src,
544 const uint32_t * mask,
545 int width)
546 {
547 if (mask)
548 vmx_combine_in_reverse_u_mask (dest, src, mask, width);
549 else
550 vmx_combine_in_reverse_u_no_mask (dest, src, width);
551 }
553 static void
554 vmx_combine_out_u_no_mask (uint32_t * dest,
555 const uint32_t *src,
556 int width)
557 {
558 int i;
559 vector unsigned int vdest, vsrc;
560 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
561 dest_mask, src_mask, store_mask;
563 COMPUTE_SHIFT_MASKS (dest, src);
565 /* printf ("%s\n",__PRETTY_FUNCTION__); */
566 for (i = width / 4; i > 0; i--)
567 {
568 LOAD_VECTORS (dest, src);
570 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
572 STORE_VECTOR (dest);
574 src += 4;
575 dest += 4;
576 }
578 for (i = width % 4; --i >= 0;)
579 {
580 uint32_t s = src[i];
581 uint32_t a = ALPHA_8 (~dest[i]);
583 UN8x4_MUL_UN8 (s, a);
585 dest[i] = s;
586 }
587 }
589 static void
590 vmx_combine_out_u_mask (uint32_t * dest,
591 const uint32_t *src,
592 const uint32_t *mask,
593 int width)
594 {
595 int i;
596 vector unsigned int vdest, vsrc, vmask;
597 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
598 dest_mask, src_mask, mask_mask, store_mask;
600 COMPUTE_SHIFT_MASKC (dest, src, mask);
602 /* printf ("%s\n",__PRETTY_FUNCTION__); */
603 for (i = width / 4; i > 0; i--)
604 {
605 LOAD_VECTORSM (dest, src, mask);
607 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
609 STORE_VECTOR (dest);
611 src += 4;
612 dest += 4;
613 mask += 4;
614 }
616 for (i = width % 4; --i >= 0;)
617 {
618 uint32_t m = ALPHA_8 (mask[i]);
619 uint32_t s = src[i];
620 uint32_t a = ALPHA_8 (~dest[i]);
622 UN8x4_MUL_UN8 (s, m);
623 UN8x4_MUL_UN8 (s, a);
625 dest[i] = s;
626 }
627 }
629 static void
630 vmx_combine_out_u (pixman_implementation_t *imp,
631 pixman_op_t op,
632 uint32_t * dest,
633 const uint32_t * src,
634 const uint32_t * mask,
635 int width)
636 {
637 if (mask)
638 vmx_combine_out_u_mask (dest, src, mask, width);
639 else
640 vmx_combine_out_u_no_mask (dest, src, width);
641 }
643 static void
644 vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
645 const uint32_t *src,
646 int width)
647 {
648 int i;
649 vector unsigned int vdest, vsrc;
650 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
651 dest_mask, src_mask, store_mask;
653 COMPUTE_SHIFT_MASKS (dest, src);
655 /* printf ("%s\n",__PRETTY_FUNCTION__); */
656 for (i = width / 4; i > 0; i--)
657 {
659 LOAD_VECTORS (dest, src);
661 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
663 STORE_VECTOR (dest);
665 src += 4;
666 dest += 4;
667 }
669 for (i = width % 4; --i >= 0;)
670 {
671 uint32_t d = dest[i];
672 uint32_t a = ALPHA_8 (~src[i]);
674 UN8x4_MUL_UN8 (d, a);
676 dest[i] = d;
677 }
678 }
680 static void
681 vmx_combine_out_reverse_u_mask (uint32_t * dest,
682 const uint32_t *src,
683 const uint32_t *mask,
684 int width)
685 {
686 int i;
687 vector unsigned int vdest, vsrc, vmask;
688 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
689 dest_mask, src_mask, mask_mask, store_mask;
691 COMPUTE_SHIFT_MASKC (dest, src, mask);
693 /* printf ("%s\n",__PRETTY_FUNCTION__); */
694 for (i = width / 4; i > 0; i--)
695 {
696 LOAD_VECTORSM (dest, src, mask);
698 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
700 STORE_VECTOR (dest);
702 src += 4;
703 dest += 4;
704 mask += 4;
705 }
707 for (i = width % 4; --i >= 0;)
708 {
709 uint32_t m = ALPHA_8 (mask[i]);
710 uint32_t d = dest[i];
711 uint32_t a = src[i];
713 UN8x4_MUL_UN8 (a, m);
714 a = ALPHA_8 (~a);
715 UN8x4_MUL_UN8 (d, a);
717 dest[i] = d;
718 }
719 }
721 static void
722 vmx_combine_out_reverse_u (pixman_implementation_t *imp,
723 pixman_op_t op,
724 uint32_t * dest,
725 const uint32_t * src,
726 const uint32_t * mask,
727 int width)
728 {
729 if (mask)
730 vmx_combine_out_reverse_u_mask (dest, src, mask, width);
731 else
732 vmx_combine_out_reverse_u_no_mask (dest, src, width);
733 }
735 static void
736 vmx_combine_atop_u_no_mask (uint32_t * dest,
737 const uint32_t *src,
738 int width)
739 {
740 int i;
741 vector unsigned int vdest, vsrc;
742 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
743 dest_mask, src_mask, store_mask;
745 COMPUTE_SHIFT_MASKS (dest, src);
747 /* printf ("%s\n",__PRETTY_FUNCTION__); */
748 for (i = width / 4; i > 0; i--)
749 {
750 LOAD_VECTORS (dest, src);
752 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
753 vdest, splat_alpha (negate (vsrc)));
755 STORE_VECTOR (dest);
757 src += 4;
758 dest += 4;
759 }
761 for (i = width % 4; --i >= 0;)
762 {
763 uint32_t s = src[i];
764 uint32_t d = dest[i];
765 uint32_t dest_a = ALPHA_8 (d);
766 uint32_t src_ia = ALPHA_8 (~s);
768 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
770 dest[i] = s;
771 }
772 }
774 static void
775 vmx_combine_atop_u_mask (uint32_t * dest,
776 const uint32_t *src,
777 const uint32_t *mask,
778 int width)
779 {
780 int i;
781 vector unsigned int vdest, vsrc, vmask;
782 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
783 dest_mask, src_mask, mask_mask, store_mask;
785 COMPUTE_SHIFT_MASKC (dest, src, mask);
787 /* printf ("%s\n",__PRETTY_FUNCTION__); */
788 for (i = width / 4; i > 0; i--)
789 {
790 LOAD_VECTORSM (dest, src, mask);
792 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
793 vdest, splat_alpha (negate (vsrc)));
795 STORE_VECTOR (dest);
797 src += 4;
798 dest += 4;
799 mask += 4;
800 }
802 for (i = width % 4; --i >= 0;)
803 {
804 uint32_t m = ALPHA_8 (mask[i]);
805 uint32_t s = src[i];
806 uint32_t d = dest[i];
807 uint32_t dest_a = ALPHA_8 (d);
808 uint32_t src_ia;
810 UN8x4_MUL_UN8 (s, m);
812 src_ia = ALPHA_8 (~s);
814 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
816 dest[i] = s;
817 }
818 }
820 static void
821 vmx_combine_atop_u (pixman_implementation_t *imp,
822 pixman_op_t op,
823 uint32_t * dest,
824 const uint32_t * src,
825 const uint32_t * mask,
826 int width)
827 {
828 if (mask)
829 vmx_combine_atop_u_mask (dest, src, mask, width);
830 else
831 vmx_combine_atop_u_no_mask (dest, src, width);
832 }
834 static void
835 vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
836 const uint32_t *src,
837 int width)
838 {
839 int i;
840 vector unsigned int vdest, vsrc;
841 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
842 dest_mask, src_mask, store_mask;
844 COMPUTE_SHIFT_MASKS (dest, src);
846 /* printf ("%s\n",__PRETTY_FUNCTION__); */
847 for (i = width / 4; i > 0; i--)
848 {
849 LOAD_VECTORS (dest, src);
851 vdest = pix_add_mul (vdest, splat_alpha (vsrc),
852 vsrc, splat_alpha (negate (vdest)));
854 STORE_VECTOR (dest);
856 src += 4;
857 dest += 4;
858 }
860 for (i = width % 4; --i >= 0;)
861 {
862 uint32_t s = src[i];
863 uint32_t d = dest[i];
864 uint32_t src_a = ALPHA_8 (s);
865 uint32_t dest_ia = ALPHA_8 (~d);
867 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
869 dest[i] = s;
870 }
871 }
873 static void
874 vmx_combine_atop_reverse_u_mask (uint32_t * dest,
875 const uint32_t *src,
876 const uint32_t *mask,
877 int width)
878 {
879 int i;
880 vector unsigned int vdest, vsrc, vmask;
881 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
882 dest_mask, src_mask, mask_mask, store_mask;
884 COMPUTE_SHIFT_MASKC (dest, src, mask);
886 /* printf ("%s\n",__PRETTY_FUNCTION__); */
887 for (i = width / 4; i > 0; i--)
888 {
889 LOAD_VECTORSM (dest, src, mask);
891 vdest = pix_add_mul (vdest, splat_alpha (vsrc),
892 vsrc, splat_alpha (negate (vdest)));
894 STORE_VECTOR (dest);
896 src += 4;
897 dest += 4;
898 mask += 4;
899 }
901 for (i = width % 4; --i >= 0;)
902 {
903 uint32_t m = ALPHA_8 (mask[i]);
904 uint32_t s = src[i];
905 uint32_t d = dest[i];
906 uint32_t src_a;
907 uint32_t dest_ia = ALPHA_8 (~d);
909 UN8x4_MUL_UN8 (s, m);
911 src_a = ALPHA_8 (s);
913 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
915 dest[i] = s;
916 }
917 }
919 static void
920 vmx_combine_atop_reverse_u (pixman_implementation_t *imp,
921 pixman_op_t op,
922 uint32_t * dest,
923 const uint32_t * src,
924 const uint32_t * mask,
925 int width)
926 {
927 if (mask)
928 vmx_combine_atop_reverse_u_mask (dest, src, mask, width);
929 else
930 vmx_combine_atop_reverse_u_no_mask (dest, src, width);
931 }
933 static void
934 vmx_combine_xor_u_no_mask (uint32_t * dest,
935 const uint32_t *src,
936 int width)
937 {
938 int i;
939 vector unsigned int vdest, vsrc;
940 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
941 dest_mask, src_mask, store_mask;
943 COMPUTE_SHIFT_MASKS (dest, src);
945 /* printf ("%s\n",__PRETTY_FUNCTION__); */
946 for (i = width / 4; i > 0; i--)
947 {
948 LOAD_VECTORS (dest, src);
950 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
951 vdest, splat_alpha (negate (vsrc)));
953 STORE_VECTOR (dest);
955 src += 4;
956 dest += 4;
957 }
959 for (i = width % 4; --i >= 0;)
960 {
961 uint32_t s = src[i];
962 uint32_t d = dest[i];
963 uint32_t src_ia = ALPHA_8 (~s);
964 uint32_t dest_ia = ALPHA_8 (~d);
966 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
968 dest[i] = s;
969 }
970 }
972 static void
973 vmx_combine_xor_u_mask (uint32_t * dest,
974 const uint32_t *src,
975 const uint32_t *mask,
976 int width)
977 {
978 int i;
979 vector unsigned int vdest, vsrc, vmask;
980 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
981 dest_mask, src_mask, mask_mask, store_mask;
983 COMPUTE_SHIFT_MASKC (dest, src, mask);
985 /* printf ("%s\n",__PRETTY_FUNCTION__); */
986 for (i = width / 4; i > 0; i--)
987 {
988 LOAD_VECTORSM (dest, src, mask);
990 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
991 vdest, splat_alpha (negate (vsrc)));
993 STORE_VECTOR (dest);
995 src += 4;
996 dest += 4;
997 mask += 4;
998 }
1000 for (i = width % 4; --i >= 0;)
1001 {
1002 uint32_t m = ALPHA_8 (mask[i]);
1003 uint32_t s = src[i];
1004 uint32_t d = dest[i];
1005 uint32_t src_ia;
1006 uint32_t dest_ia = ALPHA_8 (~d);
1008 UN8x4_MUL_UN8 (s, m);
1010 src_ia = ALPHA_8 (~s);
1012 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1014 dest[i] = s;
1015 }
1016 }
1018 static void
1019 vmx_combine_xor_u (pixman_implementation_t *imp,
1020 pixman_op_t op,
1021 uint32_t * dest,
1022 const uint32_t * src,
1023 const uint32_t * mask,
1024 int width)
1025 {
1026 if (mask)
1027 vmx_combine_xor_u_mask (dest, src, mask, width);
1028 else
1029 vmx_combine_xor_u_no_mask (dest, src, width);
1030 }
1032 static void
1033 vmx_combine_add_u_no_mask (uint32_t * dest,
1034 const uint32_t *src,
1035 int width)
1036 {
1037 int i;
1038 vector unsigned int vdest, vsrc;
1039 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1040 dest_mask, src_mask, store_mask;
1042 COMPUTE_SHIFT_MASKS (dest, src);
1043 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1044 for (i = width / 4; i > 0; i--)
1045 {
1046 LOAD_VECTORS (dest, src);
1048 vdest = pix_add (vsrc, vdest);
1050 STORE_VECTOR (dest);
1052 src += 4;
1053 dest += 4;
1054 }
1056 for (i = width % 4; --i >= 0;)
1057 {
1058 uint32_t s = src[i];
1059 uint32_t d = dest[i];
1061 UN8x4_ADD_UN8x4 (d, s);
1063 dest[i] = d;
1064 }
1065 }
1067 static void
1068 vmx_combine_add_u_mask (uint32_t * dest,
1069 const uint32_t *src,
1070 const uint32_t *mask,
1071 int width)
1072 {
1073 int i;
1074 vector unsigned int vdest, vsrc, vmask;
1075 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1076 dest_mask, src_mask, mask_mask, store_mask;
1078 COMPUTE_SHIFT_MASKC (dest, src, mask);
1080 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1081 for (i = width / 4; i > 0; i--)
1082 {
1083 LOAD_VECTORSM (dest, src, mask);
1085 vdest = pix_add (vsrc, vdest);
1087 STORE_VECTOR (dest);
1089 src += 4;
1090 dest += 4;
1091 mask += 4;
1092 }
1094 for (i = width % 4; --i >= 0;)
1095 {
1096 uint32_t m = ALPHA_8 (mask[i]);
1097 uint32_t s = src[i];
1098 uint32_t d = dest[i];
1100 UN8x4_MUL_UN8 (s, m);
1101 UN8x4_ADD_UN8x4 (d, s);
1103 dest[i] = d;
1104 }
1105 }
1107 static void
1108 vmx_combine_add_u (pixman_implementation_t *imp,
1109 pixman_op_t op,
1110 uint32_t * dest,
1111 const uint32_t * src,
1112 const uint32_t * mask,
1113 int width)
1114 {
1115 if (mask)
1116 vmx_combine_add_u_mask (dest, src, mask, width);
1117 else
1118 vmx_combine_add_u_no_mask (dest, src, width);
1119 }
1121 static void
1122 vmx_combine_src_ca (pixman_implementation_t *imp,
1123 pixman_op_t op,
1124 uint32_t * dest,
1125 const uint32_t * src,
1126 const uint32_t * mask,
1127 int width)
1128 {
1129 int i;
1130 vector unsigned int vdest, vsrc, vmask;
1131 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1132 dest_mask, mask_mask, src_mask, store_mask;
1134 COMPUTE_SHIFT_MASKC (dest, src, mask);
1136 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1137 for (i = width / 4; i > 0; i--)
1138 {
1139 LOAD_VECTORSC (dest, src, mask);
1141 vdest = pix_multiply (vsrc, vmask);
1143 STORE_VECTOR (dest);
1145 mask += 4;
1146 src += 4;
1147 dest += 4;
1148 }
1150 for (i = width % 4; --i >= 0;)
1151 {
1152 uint32_t a = mask[i];
1153 uint32_t s = src[i];
1155 UN8x4_MUL_UN8x4 (s, a);
1157 dest[i] = s;
1158 }
1159 }
1161 static void
1162 vmx_combine_over_ca (pixman_implementation_t *imp,
1163 pixman_op_t op,
1164 uint32_t * dest,
1165 const uint32_t * src,
1166 const uint32_t * mask,
1167 int width)
1168 {
1169 int i;
1170 vector unsigned int vdest, vsrc, vmask;
1171 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1172 dest_mask, mask_mask, src_mask, store_mask;
1174 COMPUTE_SHIFT_MASKC (dest, src, mask);
1176 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1177 for (i = width / 4; i > 0; i--)
1178 {
1179 LOAD_VECTORSC (dest, src, mask);
1181 vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
1183 STORE_VECTOR (dest);
1185 mask += 4;
1186 src += 4;
1187 dest += 4;
1188 }
1190 for (i = width % 4; --i >= 0;)
1191 {
1192 uint32_t a = mask[i];
1193 uint32_t s = src[i];
1194 uint32_t d = dest[i];
1195 uint32_t sa = ALPHA_8 (s);
1197 UN8x4_MUL_UN8x4 (s, a);
1198 UN8x4_MUL_UN8 (a, sa);
1199 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
1201 dest[i] = d;
1202 }
1203 }
1205 static void
1206 vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
1207 pixman_op_t op,
1208 uint32_t * dest,
1209 const uint32_t * src,
1210 const uint32_t * mask,
1211 int width)
1212 {
1213 int i;
1214 vector unsigned int vdest, vsrc, vmask;
1215 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1216 dest_mask, mask_mask, src_mask, store_mask;
1218 COMPUTE_SHIFT_MASKC (dest, src, mask);
1220 /* printf("%s\n",__PRETTY_FUNCTION__); */
1221 for (i = width / 4; i > 0; i--)
1222 {
1223 LOAD_VECTORSC (dest, src, mask);
1225 vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
1227 STORE_VECTOR (dest);
1229 mask += 4;
1230 src += 4;
1231 dest += 4;
1232 }
1234 for (i = width % 4; --i >= 0;)
1235 {
1236 uint32_t a = mask[i];
1237 uint32_t s = src[i];
1238 uint32_t d = dest[i];
1239 uint32_t ida = ALPHA_8 (~d);
1241 UN8x4_MUL_UN8x4 (s, a);
1242 UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1244 dest[i] = s;
1245 }
1246 }
1248 static void
1249 vmx_combine_in_ca (pixman_implementation_t *imp,
1250 pixman_op_t op,
1251 uint32_t * dest,
1252 const uint32_t * src,
1253 const uint32_t * mask,
1254 int width)
1255 {
1256 int i;
1257 vector unsigned int vdest, vsrc, vmask;
1258 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1259 dest_mask, mask_mask, src_mask, store_mask;
1261 COMPUTE_SHIFT_MASKC (dest, src, mask);
1263 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1264 for (i = width / 4; i > 0; i--)
1265 {
1266 LOAD_VECTORSC (dest, src, mask);
1268 vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
1270 STORE_VECTOR (dest);
1272 src += 4;
1273 dest += 4;
1274 mask += 4;
1275 }
1277 for (i = width % 4; --i >= 0;)
1278 {
1279 uint32_t a = mask[i];
1280 uint32_t s = src[i];
1281 uint32_t da = ALPHA_8 (dest[i]);
1283 UN8x4_MUL_UN8x4 (s, a);
1284 UN8x4_MUL_UN8 (s, da);
1286 dest[i] = s;
1287 }
1288 }
1290 static void
1291 vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
1292 pixman_op_t op,
1293 uint32_t * dest,
1294 const uint32_t * src,
1295 const uint32_t * mask,
1296 int width)
1297 {
1298 int i;
1299 vector unsigned int vdest, vsrc, vmask;
1300 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1301 dest_mask, mask_mask, src_mask, store_mask;
1303 COMPUTE_SHIFT_MASKC (dest, src, mask);
1305 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1306 for (i = width / 4; i > 0; i--)
1307 {
1309 LOAD_VECTORSC (dest, src, mask);
1311 vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
1313 STORE_VECTOR (dest);
1315 src += 4;
1316 dest += 4;
1317 mask += 4;
1318 }
1320 for (i = width % 4; --i >= 0;)
1321 {
1322 uint32_t a = mask[i];
1323 uint32_t d = dest[i];
1324 uint32_t sa = ALPHA_8 (src[i]);
1326 UN8x4_MUL_UN8 (a, sa);
1327 UN8x4_MUL_UN8x4 (d, a);
1329 dest[i] = d;
1330 }
1331 }
1333 static void
1334 vmx_combine_out_ca (pixman_implementation_t *imp,
1335 pixman_op_t op,
1336 uint32_t * dest,
1337 const uint32_t * src,
1338 const uint32_t * mask,
1339 int width)
1340 {
1341 int i;
1342 vector unsigned int vdest, vsrc, vmask;
1343 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1344 dest_mask, mask_mask, src_mask, store_mask;
1346 COMPUTE_SHIFT_MASKC (dest, src, mask);
1348 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1349 for (i = width / 4; i > 0; i--)
1350 {
1351 LOAD_VECTORSC (dest, src, mask);
1353 vdest = pix_multiply (
1354 pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
1356 STORE_VECTOR (dest);
1358 src += 4;
1359 dest += 4;
1360 mask += 4;
1361 }
1363 for (i = width % 4; --i >= 0;)
1364 {
1365 uint32_t a = mask[i];
1366 uint32_t s = src[i];
1367 uint32_t d = dest[i];
1368 uint32_t da = ALPHA_8 (~d);
1370 UN8x4_MUL_UN8x4 (s, a);
1371 UN8x4_MUL_UN8 (s, da);
1373 dest[i] = s;
1374 }
1375 }
1377 static void
1378 vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
1379 pixman_op_t op,
1380 uint32_t * dest,
1381 const uint32_t * src,
1382 const uint32_t * mask,
1383 int width)
1384 {
1385 int i;
1386 vector unsigned int vdest, vsrc, vmask;
1387 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1388 dest_mask, mask_mask, src_mask, store_mask;
1390 COMPUTE_SHIFT_MASKC (dest, src, mask);
1392 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1393 for (i = width / 4; i > 0; i--)
1394 {
1395 LOAD_VECTORSC (dest, src, mask);
1397 vdest = pix_multiply (
1398 vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
1400 STORE_VECTOR (dest);
1402 src += 4;
1403 dest += 4;
1404 mask += 4;
1405 }
1407 for (i = width % 4; --i >= 0;)
1408 {
1409 uint32_t a = mask[i];
1410 uint32_t s = src[i];
1411 uint32_t d = dest[i];
1412 uint32_t sa = ALPHA_8 (s);
1414 UN8x4_MUL_UN8 (a, sa);
1415 UN8x4_MUL_UN8x4 (d, ~a);
1417 dest[i] = d;
1418 }
1419 }
1421 static void
1422 vmx_combine_atop_ca (pixman_implementation_t *imp,
1423 pixman_op_t op,
1424 uint32_t * dest,
1425 const uint32_t * src,
1426 const uint32_t * mask,
1427 int width)
1428 {
1429 int i;
1430 vector unsigned int vdest, vsrc, vmask, vsrca;
1431 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1432 dest_mask, mask_mask, src_mask, store_mask;
1434 COMPUTE_SHIFT_MASKC (dest, src, mask);
1436 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1437 for (i = width / 4; i > 0; i--)
1438 {
1439 LOAD_VECTORSC (dest, src, mask);
1441 vsrca = splat_alpha (vsrc);
1443 vsrc = pix_multiply (vsrc, vmask);
1444 vmask = pix_multiply (vmask, vsrca);
1446 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
1447 negate (vmask), vdest);
1449 STORE_VECTOR (dest);
1451 src += 4;
1452 dest += 4;
1453 mask += 4;
1454 }
1456 for (i = width % 4; --i >= 0;)
1457 {
1458 uint32_t a = mask[i];
1459 uint32_t s = src[i];
1460 uint32_t d = dest[i];
1461 uint32_t sa = ALPHA_8 (s);
1462 uint32_t da = ALPHA_8 (d);
1464 UN8x4_MUL_UN8x4 (s, a);
1465 UN8x4_MUL_UN8 (a, sa);
1466 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1468 dest[i] = d;
1469 }
1470 }
1472 static void
1473 vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
1474 pixman_op_t op,
1475 uint32_t * dest,
1476 const uint32_t * src,
1477 const uint32_t * mask,
1478 int width)
1479 {
1480 int i;
1481 vector unsigned int vdest, vsrc, vmask;
1482 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1483 dest_mask, mask_mask, src_mask, store_mask;
1485 COMPUTE_SHIFT_MASKC (dest, src, mask);
1487 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1488 for (i = width / 4; i > 0; i--)
1489 {
1490 LOAD_VECTORSC (dest, src, mask);
1492 vdest = pix_add_mul (vdest,
1493 pix_multiply (vmask, splat_alpha (vsrc)),
1494 pix_multiply (vsrc, vmask),
1495 negate (splat_alpha (vdest)));
1497 STORE_VECTOR (dest);
1499 src += 4;
1500 dest += 4;
1501 mask += 4;
1502 }
1504 for (i = width % 4; --i >= 0;)
1505 {
1506 uint32_t a = mask[i];
1507 uint32_t s = src[i];
1508 uint32_t d = dest[i];
1509 uint32_t sa = ALPHA_8 (s);
1510 uint32_t da = ALPHA_8 (~d);
1512 UN8x4_MUL_UN8x4 (s, a);
1513 UN8x4_MUL_UN8 (a, sa);
1514 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
1516 dest[i] = d;
1517 }
1518 }
1520 static void
1521 vmx_combine_xor_ca (pixman_implementation_t *imp,
1522 pixman_op_t op,
1523 uint32_t * dest,
1524 const uint32_t * src,
1525 const uint32_t * mask,
1526 int width)
1527 {
1528 int i;
1529 vector unsigned int vdest, vsrc, vmask;
1530 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1531 dest_mask, mask_mask, src_mask, store_mask;
1533 COMPUTE_SHIFT_MASKC (dest, src, mask);
1535 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1536 for (i = width / 4; i > 0; i--)
1537 {
1538 LOAD_VECTORSC (dest, src, mask);
1540 vdest = pix_add_mul (vdest,
1541 negate (pix_multiply (vmask, splat_alpha (vsrc))),
1542 pix_multiply (vsrc, vmask),
1543 negate (splat_alpha (vdest)));
1545 STORE_VECTOR (dest);
1547 src += 4;
1548 dest += 4;
1549 mask += 4;
1550 }
1552 for (i = width % 4; --i >= 0;)
1553 {
1554 uint32_t a = mask[i];
1555 uint32_t s = src[i];
1556 uint32_t d = dest[i];
1557 uint32_t sa = ALPHA_8 (s);
1558 uint32_t da = ALPHA_8 (~d);
1560 UN8x4_MUL_UN8x4 (s, a);
1561 UN8x4_MUL_UN8 (a, sa);
1562 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1564 dest[i] = d;
1565 }
1566 }
1568 static void
1569 vmx_combine_add_ca (pixman_implementation_t *imp,
1570 pixman_op_t op,
1571 uint32_t * dest,
1572 const uint32_t * src,
1573 const uint32_t * mask,
1574 int width)
1575 {
1576 int i;
1577 vector unsigned int vdest, vsrc, vmask;
1578 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1579 dest_mask, mask_mask, src_mask, store_mask;
1581 COMPUTE_SHIFT_MASKC (dest, src, mask);
1583 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1584 for (i = width / 4; i > 0; i--)
1585 {
1586 LOAD_VECTORSC (dest, src, mask);
1588 vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
1590 STORE_VECTOR (dest);
1592 src += 4;
1593 dest += 4;
1594 mask += 4;
1595 }
1597 for (i = width % 4; --i >= 0;)
1598 {
1599 uint32_t a = mask[i];
1600 uint32_t s = src[i];
1601 uint32_t d = dest[i];
1603 UN8x4_MUL_UN8x4 (s, a);
1604 UN8x4_ADD_UN8x4 (s, d);
1606 dest[i] = s;
1607 }
1608 }
1610 static const pixman_fast_path_t vmx_fast_paths[] =
1611 {
1612 { PIXMAN_OP_NONE },
1613 };
1615 pixman_implementation_t *
1616 _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
1617 {
1618 pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
1620 /* Set up function pointers */
1622 imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
1623 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u;
1624 imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u;
1625 imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u;
1626 imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u;
1627 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u;
1628 imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
1629 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
1630 imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
1632 imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
1634 imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
1635 imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
1636 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
1637 imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca;
1638 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca;
1639 imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca;
1640 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca;
1641 imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca;
1642 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca;
1643 imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
1644 imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
1646 return imp;
1647 }