gfx/cairo/libpixman/src/pixman-vmx.c

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:ee21338e48ec
1 /*
2 * Copyright © 2007 Luca Barbato
3 *
4 * Permission to use, copy, modify, distribute, and sell this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * the above copyright notice appear in all copies and that both that
7 * copyright notice and this permission notice appear in supporting
8 * documentation, and that the name of Luca Barbato not be used in advertising or
9 * publicity pertaining to distribution of the software without specific,
10 * written prior permission. Luca Barbato makes no representations about the
11 * suitability of this software for any purpose. It is provided "as is"
12 * without express or implied warranty.
13 *
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
21 * SOFTWARE.
22 *
23 * Author: Luca Barbato (lu_zero@gentoo.org)
24 *
25 * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
26 */
27
28 #include <config.h>
29 #include "pixman-private.h"
30 #include "pixman-combine32.h"
31 #include <altivec.h>
32
33 #define AVV(x...) {x}
34
35 static force_inline vector unsigned int
36 splat_alpha (vector unsigned int pix)
37 {
38 return vec_perm (pix, pix,
39 (vector unsigned char)AVV (
40 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
41 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
42 }
43
44 static force_inline vector unsigned int
45 pix_multiply (vector unsigned int p, vector unsigned int a)
46 {
47 vector unsigned short hi, lo, mod;
48
49 /* unpack to short */
50 hi = (vector unsigned short)
51 vec_mergeh ((vector unsigned char)AVV (0),
52 (vector unsigned char)p);
53
54 mod = (vector unsigned short)
55 vec_mergeh ((vector unsigned char)AVV (0),
56 (vector unsigned char)a);
57
58 hi = vec_mladd (hi, mod, (vector unsigned short)
59 AVV (0x0080, 0x0080, 0x0080, 0x0080,
60 0x0080, 0x0080, 0x0080, 0x0080));
61
62 hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
63
64 hi = vec_sr (hi, vec_splat_u16 (8));
65
66 /* unpack to short */
67 lo = (vector unsigned short)
68 vec_mergel ((vector unsigned char)AVV (0),
69 (vector unsigned char)p);
70 mod = (vector unsigned short)
71 vec_mergel ((vector unsigned char)AVV (0),
72 (vector unsigned char)a);
73
74 lo = vec_mladd (lo, mod, (vector unsigned short)
75 AVV (0x0080, 0x0080, 0x0080, 0x0080,
76 0x0080, 0x0080, 0x0080, 0x0080));
77
78 lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
79
80 lo = vec_sr (lo, vec_splat_u16 (8));
81
82 return (vector unsigned int)vec_packsu (hi, lo);
83 }
84
85 static force_inline vector unsigned int
86 pix_add (vector unsigned int a, vector unsigned int b)
87 {
88 return (vector unsigned int)vec_adds ((vector unsigned char)a,
89 (vector unsigned char)b);
90 }
91
92 static force_inline vector unsigned int
93 pix_add_mul (vector unsigned int x,
94 vector unsigned int a,
95 vector unsigned int y,
96 vector unsigned int b)
97 {
98 vector unsigned int t1, t2;
99
100 t1 = pix_multiply (x, a);
101 t2 = pix_multiply (y, b);
102
103 return pix_add (t1, t2);
104 }
105
106 static force_inline vector unsigned int
107 negate (vector unsigned int src)
108 {
109 return vec_nor (src, src);
110 }
111
112 /* dest*~srca + src */
113 static force_inline vector unsigned int
114 over (vector unsigned int src,
115 vector unsigned int srca,
116 vector unsigned int dest)
117 {
118 vector unsigned char tmp = (vector unsigned char)
119 pix_multiply (dest, negate (srca));
120
121 tmp = vec_adds ((vector unsigned char)src, tmp);
122 return (vector unsigned int)tmp;
123 }
124
125 /* in == pix_multiply */
126 #define in_over(src, srca, mask, dest) \
127 over (pix_multiply (src, mask), \
128 pix_multiply (srca, mask), dest)
129
130
131 #define COMPUTE_SHIFT_MASK(source) \
132 source ## _mask = vec_lvsl (0, source);
133
134 #define COMPUTE_SHIFT_MASKS(dest, source) \
135 dest ## _mask = vec_lvsl (0, dest); \
136 source ## _mask = vec_lvsl (0, source); \
137 store_mask = vec_lvsr (0, dest);
138
139 #define COMPUTE_SHIFT_MASKC(dest, source, mask) \
140 mask ## _mask = vec_lvsl (0, mask); \
141 dest ## _mask = vec_lvsl (0, dest); \
142 source ## _mask = vec_lvsl (0, source); \
143 store_mask = vec_lvsr (0, dest);
144
145 /* notice you have to declare temp vars...
146 * Note: tmp3 and tmp4 must remain untouched!
147 */
148
149 #define LOAD_VECTORS(dest, source) \
150 tmp1 = (typeof(tmp1))vec_ld (0, source); \
151 tmp2 = (typeof(tmp2))vec_ld (15, source); \
152 tmp3 = (typeof(tmp3))vec_ld (0, dest); \
153 v ## source = (typeof(v ## source)) \
154 vec_perm (tmp1, tmp2, source ## _mask); \
155 tmp4 = (typeof(tmp4))vec_ld (15, dest); \
156 v ## dest = (typeof(v ## dest)) \
157 vec_perm (tmp3, tmp4, dest ## _mask);
158
159 #define LOAD_VECTORSC(dest, source, mask) \
160 tmp1 = (typeof(tmp1))vec_ld (0, source); \
161 tmp2 = (typeof(tmp2))vec_ld (15, source); \
162 tmp3 = (typeof(tmp3))vec_ld (0, dest); \
163 v ## source = (typeof(v ## source)) \
164 vec_perm (tmp1, tmp2, source ## _mask); \
165 tmp4 = (typeof(tmp4))vec_ld (15, dest); \
166 tmp1 = (typeof(tmp1))vec_ld (0, mask); \
167 v ## dest = (typeof(v ## dest)) \
168 vec_perm (tmp3, tmp4, dest ## _mask); \
169 tmp2 = (typeof(tmp2))vec_ld (15, mask); \
170 v ## mask = (typeof(v ## mask)) \
171 vec_perm (tmp1, tmp2, mask ## _mask);
172
173 #define LOAD_VECTORSM(dest, source, mask) \
174 LOAD_VECTORSC (dest, source, mask) \
175 v ## source = pix_multiply (v ## source, \
176 splat_alpha (v ## mask));
177
178 #define STORE_VECTOR(dest) \
179 edges = vec_perm (tmp4, tmp3, dest ## _mask); \
180 tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
181 tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
182 vec_st ((vector unsigned int) tmp3, 15, dest); \
183 vec_st ((vector unsigned int) tmp1, 0, dest);
184
185 static void
186 vmx_combine_over_u_no_mask (uint32_t * dest,
187 const uint32_t *src,
188 int width)
189 {
190 int i;
191 vector unsigned int vdest, vsrc;
192 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
193 dest_mask, src_mask, store_mask;
194
195 COMPUTE_SHIFT_MASKS (dest, src);
196
197 /* printf ("%s\n",__PRETTY_FUNCTION__); */
198 for (i = width / 4; i > 0; i--)
199 {
200
201 LOAD_VECTORS (dest, src);
202
203 vdest = over (vsrc, splat_alpha (vsrc), vdest);
204
205 STORE_VECTOR (dest);
206
207 src += 4;
208 dest += 4;
209 }
210
211 for (i = width % 4; --i >= 0;)
212 {
213 uint32_t s = src[i];
214 uint32_t d = dest[i];
215 uint32_t ia = ALPHA_8 (~s);
216
217 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
218
219 dest[i] = d;
220 }
221 }
222
223 static void
224 vmx_combine_over_u_mask (uint32_t * dest,
225 const uint32_t *src,
226 const uint32_t *mask,
227 int width)
228 {
229 int i;
230 vector unsigned int vdest, vsrc, vmask;
231 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
232 dest_mask, src_mask, mask_mask, store_mask;
233
234 COMPUTE_SHIFT_MASKC (dest, src, mask);
235
236 /* printf ("%s\n",__PRETTY_FUNCTION__); */
237 for (i = width / 4; i > 0; i--)
238 {
239 LOAD_VECTORSM (dest, src, mask);
240
241 vdest = over (vsrc, splat_alpha (vsrc), vdest);
242
243 STORE_VECTOR (dest);
244
245 src += 4;
246 dest += 4;
247 mask += 4;
248 }
249
250 for (i = width % 4; --i >= 0;)
251 {
252 uint32_t m = ALPHA_8 (mask[i]);
253 uint32_t s = src[i];
254 uint32_t d = dest[i];
255 uint32_t ia;
256
257 UN8x4_MUL_UN8 (s, m);
258
259 ia = ALPHA_8 (~s);
260
261 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
262 dest[i] = d;
263 }
264 }
265
266 static void
267 vmx_combine_over_u (pixman_implementation_t *imp,
268 pixman_op_t op,
269 uint32_t * dest,
270 const uint32_t * src,
271 const uint32_t * mask,
272 int width)
273 {
274 if (mask)
275 vmx_combine_over_u_mask (dest, src, mask, width);
276 else
277 vmx_combine_over_u_no_mask (dest, src, width);
278 }
279
280 static void
281 vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
282 const uint32_t *src,
283 int width)
284 {
285 int i;
286 vector unsigned int vdest, vsrc;
287 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
288 dest_mask, src_mask, store_mask;
289
290 COMPUTE_SHIFT_MASKS (dest, src);
291
292 /* printf ("%s\n",__PRETTY_FUNCTION__); */
293 for (i = width / 4; i > 0; i--)
294 {
295
296 LOAD_VECTORS (dest, src);
297
298 vdest = over (vdest, splat_alpha (vdest), vsrc);
299
300 STORE_VECTOR (dest);
301
302 src += 4;
303 dest += 4;
304 }
305
306 for (i = width % 4; --i >= 0;)
307 {
308 uint32_t s = src[i];
309 uint32_t d = dest[i];
310 uint32_t ia = ALPHA_8 (~dest[i]);
311
312 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
313 dest[i] = s;
314 }
315 }
316
317 static void
318 vmx_combine_over_reverse_u_mask (uint32_t * dest,
319 const uint32_t *src,
320 const uint32_t *mask,
321 int width)
322 {
323 int i;
324 vector unsigned int vdest, vsrc, vmask;
325 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
326 dest_mask, src_mask, mask_mask, store_mask;
327
328 COMPUTE_SHIFT_MASKC (dest, src, mask);
329
330 /* printf ("%s\n",__PRETTY_FUNCTION__); */
331 for (i = width / 4; i > 0; i--)
332 {
333
334 LOAD_VECTORSM (dest, src, mask);
335
336 vdest = over (vdest, splat_alpha (vdest), vsrc);
337
338 STORE_VECTOR (dest);
339
340 src += 4;
341 dest += 4;
342 mask += 4;
343 }
344
345 for (i = width % 4; --i >= 0;)
346 {
347 uint32_t m = ALPHA_8 (mask[i]);
348 uint32_t s = src[i];
349 uint32_t d = dest[i];
350 uint32_t ia = ALPHA_8 (~dest[i]);
351
352 UN8x4_MUL_UN8 (s, m);
353
354 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
355 dest[i] = s;
356 }
357 }
358
359 static void
360 vmx_combine_over_reverse_u (pixman_implementation_t *imp,
361 pixman_op_t op,
362 uint32_t * dest,
363 const uint32_t * src,
364 const uint32_t * mask,
365 int width)
366 {
367 if (mask)
368 vmx_combine_over_reverse_u_mask (dest, src, mask, width);
369 else
370 vmx_combine_over_reverse_u_no_mask (dest, src, width);
371 }
372
373 static void
374 vmx_combine_in_u_no_mask (uint32_t * dest,
375 const uint32_t *src,
376 int width)
377 {
378 int i;
379 vector unsigned int vdest, vsrc;
380 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
381 dest_mask, src_mask, store_mask;
382
383 COMPUTE_SHIFT_MASKS (dest, src);
384
385 /* printf ("%s\n",__PRETTY_FUNCTION__); */
386 for (i = width / 4; i > 0; i--)
387 {
388 LOAD_VECTORS (dest, src);
389
390 vdest = pix_multiply (vsrc, splat_alpha (vdest));
391
392 STORE_VECTOR (dest);
393
394 src += 4;
395 dest += 4;
396 }
397
398 for (i = width % 4; --i >= 0;)
399 {
400 uint32_t s = src[i];
401 uint32_t a = ALPHA_8 (dest[i]);
402
403 UN8x4_MUL_UN8 (s, a);
404 dest[i] = s;
405 }
406 }
407
408 static void
409 vmx_combine_in_u_mask (uint32_t * dest,
410 const uint32_t *src,
411 const uint32_t *mask,
412 int width)
413 {
414 int i;
415 vector unsigned int vdest, vsrc, vmask;
416 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
417 dest_mask, src_mask, mask_mask, store_mask;
418
419 COMPUTE_SHIFT_MASKC (dest, src, mask);
420
421 /* printf ("%s\n",__PRETTY_FUNCTION__); */
422 for (i = width / 4; i > 0; i--)
423 {
424 LOAD_VECTORSM (dest, src, mask);
425
426 vdest = pix_multiply (vsrc, splat_alpha (vdest));
427
428 STORE_VECTOR (dest);
429
430 src += 4;
431 dest += 4;
432 mask += 4;
433 }
434
435 for (i = width % 4; --i >= 0;)
436 {
437 uint32_t m = ALPHA_8 (mask[i]);
438 uint32_t s = src[i];
439 uint32_t a = ALPHA_8 (dest[i]);
440
441 UN8x4_MUL_UN8 (s, m);
442 UN8x4_MUL_UN8 (s, a);
443
444 dest[i] = s;
445 }
446 }
447
448 static void
449 vmx_combine_in_u (pixman_implementation_t *imp,
450 pixman_op_t op,
451 uint32_t * dest,
452 const uint32_t * src,
453 const uint32_t * mask,
454 int width)
455 {
456 if (mask)
457 vmx_combine_in_u_mask (dest, src, mask, width);
458 else
459 vmx_combine_in_u_no_mask (dest, src, width);
460 }
461
462 static void
463 vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
464 const uint32_t *src,
465 int width)
466 {
467 int i;
468 vector unsigned int vdest, vsrc;
469 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
470 dest_mask, src_mask, store_mask;
471
472 COMPUTE_SHIFT_MASKS (dest, src);
473
474 /* printf ("%s\n",__PRETTY_FUNCTION__); */
475 for (i = width / 4; i > 0; i--)
476 {
477 LOAD_VECTORS (dest, src);
478
479 vdest = pix_multiply (vdest, splat_alpha (vsrc));
480
481 STORE_VECTOR (dest);
482
483 src += 4;
484 dest += 4;
485 }
486
487 for (i = width % 4; --i >= 0;)
488 {
489 uint32_t d = dest[i];
490 uint32_t a = ALPHA_8 (src[i]);
491
492 UN8x4_MUL_UN8 (d, a);
493
494 dest[i] = d;
495 }
496 }
497
498 static void
499 vmx_combine_in_reverse_u_mask (uint32_t * dest,
500 const uint32_t *src,
501 const uint32_t *mask,
502 int width)
503 {
504 int i;
505 vector unsigned int vdest, vsrc, vmask;
506 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
507 dest_mask, src_mask, mask_mask, store_mask;
508
509 COMPUTE_SHIFT_MASKC (dest, src, mask);
510
511 /* printf ("%s\n",__PRETTY_FUNCTION__); */
512 for (i = width / 4; i > 0; i--)
513 {
514 LOAD_VECTORSM (dest, src, mask);
515
516 vdest = pix_multiply (vdest, splat_alpha (vsrc));
517
518 STORE_VECTOR (dest);
519
520 src += 4;
521 dest += 4;
522 mask += 4;
523 }
524
525 for (i = width % 4; --i >= 0;)
526 {
527 uint32_t m = ALPHA_8 (mask[i]);
528 uint32_t d = dest[i];
529 uint32_t a = src[i];
530
531 UN8x4_MUL_UN8 (a, m);
532 a = ALPHA_8 (a);
533 UN8x4_MUL_UN8 (d, a);
534
535 dest[i] = d;
536 }
537 }
538
539 static void
540 vmx_combine_in_reverse_u (pixman_implementation_t *imp,
541 pixman_op_t op,
542 uint32_t * dest,
543 const uint32_t * src,
544 const uint32_t * mask,
545 int width)
546 {
547 if (mask)
548 vmx_combine_in_reverse_u_mask (dest, src, mask, width);
549 else
550 vmx_combine_in_reverse_u_no_mask (dest, src, width);
551 }
552
553 static void
554 vmx_combine_out_u_no_mask (uint32_t * dest,
555 const uint32_t *src,
556 int width)
557 {
558 int i;
559 vector unsigned int vdest, vsrc;
560 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
561 dest_mask, src_mask, store_mask;
562
563 COMPUTE_SHIFT_MASKS (dest, src);
564
565 /* printf ("%s\n",__PRETTY_FUNCTION__); */
566 for (i = width / 4; i > 0; i--)
567 {
568 LOAD_VECTORS (dest, src);
569
570 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
571
572 STORE_VECTOR (dest);
573
574 src += 4;
575 dest += 4;
576 }
577
578 for (i = width % 4; --i >= 0;)
579 {
580 uint32_t s = src[i];
581 uint32_t a = ALPHA_8 (~dest[i]);
582
583 UN8x4_MUL_UN8 (s, a);
584
585 dest[i] = s;
586 }
587 }
588
589 static void
590 vmx_combine_out_u_mask (uint32_t * dest,
591 const uint32_t *src,
592 const uint32_t *mask,
593 int width)
594 {
595 int i;
596 vector unsigned int vdest, vsrc, vmask;
597 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
598 dest_mask, src_mask, mask_mask, store_mask;
599
600 COMPUTE_SHIFT_MASKC (dest, src, mask);
601
602 /* printf ("%s\n",__PRETTY_FUNCTION__); */
603 for (i = width / 4; i > 0; i--)
604 {
605 LOAD_VECTORSM (dest, src, mask);
606
607 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
608
609 STORE_VECTOR (dest);
610
611 src += 4;
612 dest += 4;
613 mask += 4;
614 }
615
616 for (i = width % 4; --i >= 0;)
617 {
618 uint32_t m = ALPHA_8 (mask[i]);
619 uint32_t s = src[i];
620 uint32_t a = ALPHA_8 (~dest[i]);
621
622 UN8x4_MUL_UN8 (s, m);
623 UN8x4_MUL_UN8 (s, a);
624
625 dest[i] = s;
626 }
627 }
628
629 static void
630 vmx_combine_out_u (pixman_implementation_t *imp,
631 pixman_op_t op,
632 uint32_t * dest,
633 const uint32_t * src,
634 const uint32_t * mask,
635 int width)
636 {
637 if (mask)
638 vmx_combine_out_u_mask (dest, src, mask, width);
639 else
640 vmx_combine_out_u_no_mask (dest, src, width);
641 }
642
643 static void
644 vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
645 const uint32_t *src,
646 int width)
647 {
648 int i;
649 vector unsigned int vdest, vsrc;
650 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
651 dest_mask, src_mask, store_mask;
652
653 COMPUTE_SHIFT_MASKS (dest, src);
654
655 /* printf ("%s\n",__PRETTY_FUNCTION__); */
656 for (i = width / 4; i > 0; i--)
657 {
658
659 LOAD_VECTORS (dest, src);
660
661 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
662
663 STORE_VECTOR (dest);
664
665 src += 4;
666 dest += 4;
667 }
668
669 for (i = width % 4; --i >= 0;)
670 {
671 uint32_t d = dest[i];
672 uint32_t a = ALPHA_8 (~src[i]);
673
674 UN8x4_MUL_UN8 (d, a);
675
676 dest[i] = d;
677 }
678 }
679
680 static void
681 vmx_combine_out_reverse_u_mask (uint32_t * dest,
682 const uint32_t *src,
683 const uint32_t *mask,
684 int width)
685 {
686 int i;
687 vector unsigned int vdest, vsrc, vmask;
688 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
689 dest_mask, src_mask, mask_mask, store_mask;
690
691 COMPUTE_SHIFT_MASKC (dest, src, mask);
692
693 /* printf ("%s\n",__PRETTY_FUNCTION__); */
694 for (i = width / 4; i > 0; i--)
695 {
696 LOAD_VECTORSM (dest, src, mask);
697
698 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
699
700 STORE_VECTOR (dest);
701
702 src += 4;
703 dest += 4;
704 mask += 4;
705 }
706
707 for (i = width % 4; --i >= 0;)
708 {
709 uint32_t m = ALPHA_8 (mask[i]);
710 uint32_t d = dest[i];
711 uint32_t a = src[i];
712
713 UN8x4_MUL_UN8 (a, m);
714 a = ALPHA_8 (~a);
715 UN8x4_MUL_UN8 (d, a);
716
717 dest[i] = d;
718 }
719 }
720
721 static void
722 vmx_combine_out_reverse_u (pixman_implementation_t *imp,
723 pixman_op_t op,
724 uint32_t * dest,
725 const uint32_t * src,
726 const uint32_t * mask,
727 int width)
728 {
729 if (mask)
730 vmx_combine_out_reverse_u_mask (dest, src, mask, width);
731 else
732 vmx_combine_out_reverse_u_no_mask (dest, src, width);
733 }
734
735 static void
736 vmx_combine_atop_u_no_mask (uint32_t * dest,
737 const uint32_t *src,
738 int width)
739 {
740 int i;
741 vector unsigned int vdest, vsrc;
742 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
743 dest_mask, src_mask, store_mask;
744
745 COMPUTE_SHIFT_MASKS (dest, src);
746
747 /* printf ("%s\n",__PRETTY_FUNCTION__); */
748 for (i = width / 4; i > 0; i--)
749 {
750 LOAD_VECTORS (dest, src);
751
752 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
753 vdest, splat_alpha (negate (vsrc)));
754
755 STORE_VECTOR (dest);
756
757 src += 4;
758 dest += 4;
759 }
760
761 for (i = width % 4; --i >= 0;)
762 {
763 uint32_t s = src[i];
764 uint32_t d = dest[i];
765 uint32_t dest_a = ALPHA_8 (d);
766 uint32_t src_ia = ALPHA_8 (~s);
767
768 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
769
770 dest[i] = s;
771 }
772 }
773
774 static void
775 vmx_combine_atop_u_mask (uint32_t * dest,
776 const uint32_t *src,
777 const uint32_t *mask,
778 int width)
779 {
780 int i;
781 vector unsigned int vdest, vsrc, vmask;
782 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
783 dest_mask, src_mask, mask_mask, store_mask;
784
785 COMPUTE_SHIFT_MASKC (dest, src, mask);
786
787 /* printf ("%s\n",__PRETTY_FUNCTION__); */
788 for (i = width / 4; i > 0; i--)
789 {
790 LOAD_VECTORSM (dest, src, mask);
791
792 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
793 vdest, splat_alpha (negate (vsrc)));
794
795 STORE_VECTOR (dest);
796
797 src += 4;
798 dest += 4;
799 mask += 4;
800 }
801
802 for (i = width % 4; --i >= 0;)
803 {
804 uint32_t m = ALPHA_8 (mask[i]);
805 uint32_t s = src[i];
806 uint32_t d = dest[i];
807 uint32_t dest_a = ALPHA_8 (d);
808 uint32_t src_ia;
809
810 UN8x4_MUL_UN8 (s, m);
811
812 src_ia = ALPHA_8 (~s);
813
814 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
815
816 dest[i] = s;
817 }
818 }
819
820 static void
821 vmx_combine_atop_u (pixman_implementation_t *imp,
822 pixman_op_t op,
823 uint32_t * dest,
824 const uint32_t * src,
825 const uint32_t * mask,
826 int width)
827 {
828 if (mask)
829 vmx_combine_atop_u_mask (dest, src, mask, width);
830 else
831 vmx_combine_atop_u_no_mask (dest, src, width);
832 }
833
834 static void
835 vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
836 const uint32_t *src,
837 int width)
838 {
839 int i;
840 vector unsigned int vdest, vsrc;
841 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
842 dest_mask, src_mask, store_mask;
843
844 COMPUTE_SHIFT_MASKS (dest, src);
845
846 /* printf ("%s\n",__PRETTY_FUNCTION__); */
847 for (i = width / 4; i > 0; i--)
848 {
849 LOAD_VECTORS (dest, src);
850
851 vdest = pix_add_mul (vdest, splat_alpha (vsrc),
852 vsrc, splat_alpha (negate (vdest)));
853
854 STORE_VECTOR (dest);
855
856 src += 4;
857 dest += 4;
858 }
859
860 for (i = width % 4; --i >= 0;)
861 {
862 uint32_t s = src[i];
863 uint32_t d = dest[i];
864 uint32_t src_a = ALPHA_8 (s);
865 uint32_t dest_ia = ALPHA_8 (~d);
866
867 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
868
869 dest[i] = s;
870 }
871 }
872
873 static void
874 vmx_combine_atop_reverse_u_mask (uint32_t * dest,
875 const uint32_t *src,
876 const uint32_t *mask,
877 int width)
878 {
879 int i;
880 vector unsigned int vdest, vsrc, vmask;
881 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
882 dest_mask, src_mask, mask_mask, store_mask;
883
884 COMPUTE_SHIFT_MASKC (dest, src, mask);
885
886 /* printf ("%s\n",__PRETTY_FUNCTION__); */
887 for (i = width / 4; i > 0; i--)
888 {
889 LOAD_VECTORSM (dest, src, mask);
890
891 vdest = pix_add_mul (vdest, splat_alpha (vsrc),
892 vsrc, splat_alpha (negate (vdest)));
893
894 STORE_VECTOR (dest);
895
896 src += 4;
897 dest += 4;
898 mask += 4;
899 }
900
901 for (i = width % 4; --i >= 0;)
902 {
903 uint32_t m = ALPHA_8 (mask[i]);
904 uint32_t s = src[i];
905 uint32_t d = dest[i];
906 uint32_t src_a;
907 uint32_t dest_ia = ALPHA_8 (~d);
908
909 UN8x4_MUL_UN8 (s, m);
910
911 src_a = ALPHA_8 (s);
912
913 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
914
915 dest[i] = s;
916 }
917 }
918
919 static void
920 vmx_combine_atop_reverse_u (pixman_implementation_t *imp,
921 pixman_op_t op,
922 uint32_t * dest,
923 const uint32_t * src,
924 const uint32_t * mask,
925 int width)
926 {
927 if (mask)
928 vmx_combine_atop_reverse_u_mask (dest, src, mask, width);
929 else
930 vmx_combine_atop_reverse_u_no_mask (dest, src, width);
931 }
932
933 static void
934 vmx_combine_xor_u_no_mask (uint32_t * dest,
935 const uint32_t *src,
936 int width)
937 {
938 int i;
939 vector unsigned int vdest, vsrc;
940 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
941 dest_mask, src_mask, store_mask;
942
943 COMPUTE_SHIFT_MASKS (dest, src);
944
945 /* printf ("%s\n",__PRETTY_FUNCTION__); */
946 for (i = width / 4; i > 0; i--)
947 {
948 LOAD_VECTORS (dest, src);
949
950 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
951 vdest, splat_alpha (negate (vsrc)));
952
953 STORE_VECTOR (dest);
954
955 src += 4;
956 dest += 4;
957 }
958
959 for (i = width % 4; --i >= 0;)
960 {
961 uint32_t s = src[i];
962 uint32_t d = dest[i];
963 uint32_t src_ia = ALPHA_8 (~s);
964 uint32_t dest_ia = ALPHA_8 (~d);
965
966 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
967
968 dest[i] = s;
969 }
970 }
971
972 static void
973 vmx_combine_xor_u_mask (uint32_t * dest,
974 const uint32_t *src,
975 const uint32_t *mask,
976 int width)
977 {
978 int i;
979 vector unsigned int vdest, vsrc, vmask;
980 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
981 dest_mask, src_mask, mask_mask, store_mask;
982
983 COMPUTE_SHIFT_MASKC (dest, src, mask);
984
985 /* printf ("%s\n",__PRETTY_FUNCTION__); */
986 for (i = width / 4; i > 0; i--)
987 {
988 LOAD_VECTORSM (dest, src, mask);
989
990 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
991 vdest, splat_alpha (negate (vsrc)));
992
993 STORE_VECTOR (dest);
994
995 src += 4;
996 dest += 4;
997 mask += 4;
998 }
999
1000 for (i = width % 4; --i >= 0;)
1001 {
1002 uint32_t m = ALPHA_8 (mask[i]);
1003 uint32_t s = src[i];
1004 uint32_t d = dest[i];
1005 uint32_t src_ia;
1006 uint32_t dest_ia = ALPHA_8 (~d);
1007
1008 UN8x4_MUL_UN8 (s, m);
1009
1010 src_ia = ALPHA_8 (~s);
1011
1012 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
1013
1014 dest[i] = s;
1015 }
1016 }
1017
1018 static void
1019 vmx_combine_xor_u (pixman_implementation_t *imp,
1020 pixman_op_t op,
1021 uint32_t * dest,
1022 const uint32_t * src,
1023 const uint32_t * mask,
1024 int width)
1025 {
1026 if (mask)
1027 vmx_combine_xor_u_mask (dest, src, mask, width);
1028 else
1029 vmx_combine_xor_u_no_mask (dest, src, width);
1030 }
1031
1032 static void
1033 vmx_combine_add_u_no_mask (uint32_t * dest,
1034 const uint32_t *src,
1035 int width)
1036 {
1037 int i;
1038 vector unsigned int vdest, vsrc;
1039 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1040 dest_mask, src_mask, store_mask;
1041
1042 COMPUTE_SHIFT_MASKS (dest, src);
1043 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1044 for (i = width / 4; i > 0; i--)
1045 {
1046 LOAD_VECTORS (dest, src);
1047
1048 vdest = pix_add (vsrc, vdest);
1049
1050 STORE_VECTOR (dest);
1051
1052 src += 4;
1053 dest += 4;
1054 }
1055
1056 for (i = width % 4; --i >= 0;)
1057 {
1058 uint32_t s = src[i];
1059 uint32_t d = dest[i];
1060
1061 UN8x4_ADD_UN8x4 (d, s);
1062
1063 dest[i] = d;
1064 }
1065 }
1066
1067 static void
1068 vmx_combine_add_u_mask (uint32_t * dest,
1069 const uint32_t *src,
1070 const uint32_t *mask,
1071 int width)
1072 {
1073 int i;
1074 vector unsigned int vdest, vsrc, vmask;
1075 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1076 dest_mask, src_mask, mask_mask, store_mask;
1077
1078 COMPUTE_SHIFT_MASKC (dest, src, mask);
1079
1080 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1081 for (i = width / 4; i > 0; i--)
1082 {
1083 LOAD_VECTORSM (dest, src, mask);
1084
1085 vdest = pix_add (vsrc, vdest);
1086
1087 STORE_VECTOR (dest);
1088
1089 src += 4;
1090 dest += 4;
1091 mask += 4;
1092 }
1093
1094 for (i = width % 4; --i >= 0;)
1095 {
1096 uint32_t m = ALPHA_8 (mask[i]);
1097 uint32_t s = src[i];
1098 uint32_t d = dest[i];
1099
1100 UN8x4_MUL_UN8 (s, m);
1101 UN8x4_ADD_UN8x4 (d, s);
1102
1103 dest[i] = d;
1104 }
1105 }
1106
1107 static void
1108 vmx_combine_add_u (pixman_implementation_t *imp,
1109 pixman_op_t op,
1110 uint32_t * dest,
1111 const uint32_t * src,
1112 const uint32_t * mask,
1113 int width)
1114 {
1115 if (mask)
1116 vmx_combine_add_u_mask (dest, src, mask, width);
1117 else
1118 vmx_combine_add_u_no_mask (dest, src, width);
1119 }
1120
1121 static void
1122 vmx_combine_src_ca (pixman_implementation_t *imp,
1123 pixman_op_t op,
1124 uint32_t * dest,
1125 const uint32_t * src,
1126 const uint32_t * mask,
1127 int width)
1128 {
1129 int i;
1130 vector unsigned int vdest, vsrc, vmask;
1131 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1132 dest_mask, mask_mask, src_mask, store_mask;
1133
1134 COMPUTE_SHIFT_MASKC (dest, src, mask);
1135
1136 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1137 for (i = width / 4; i > 0; i--)
1138 {
1139 LOAD_VECTORSC (dest, src, mask);
1140
1141 vdest = pix_multiply (vsrc, vmask);
1142
1143 STORE_VECTOR (dest);
1144
1145 mask += 4;
1146 src += 4;
1147 dest += 4;
1148 }
1149
1150 for (i = width % 4; --i >= 0;)
1151 {
1152 uint32_t a = mask[i];
1153 uint32_t s = src[i];
1154
1155 UN8x4_MUL_UN8x4 (s, a);
1156
1157 dest[i] = s;
1158 }
1159 }
1160
1161 static void
1162 vmx_combine_over_ca (pixman_implementation_t *imp,
1163 pixman_op_t op,
1164 uint32_t * dest,
1165 const uint32_t * src,
1166 const uint32_t * mask,
1167 int width)
1168 {
1169 int i;
1170 vector unsigned int vdest, vsrc, vmask;
1171 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1172 dest_mask, mask_mask, src_mask, store_mask;
1173
1174 COMPUTE_SHIFT_MASKC (dest, src, mask);
1175
1176 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1177 for (i = width / 4; i > 0; i--)
1178 {
1179 LOAD_VECTORSC (dest, src, mask);
1180
1181 vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
1182
1183 STORE_VECTOR (dest);
1184
1185 mask += 4;
1186 src += 4;
1187 dest += 4;
1188 }
1189
1190 for (i = width % 4; --i >= 0;)
1191 {
1192 uint32_t a = mask[i];
1193 uint32_t s = src[i];
1194 uint32_t d = dest[i];
1195 uint32_t sa = ALPHA_8 (s);
1196
1197 UN8x4_MUL_UN8x4 (s, a);
1198 UN8x4_MUL_UN8 (a, sa);
1199 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
1200
1201 dest[i] = d;
1202 }
1203 }
1204
1205 static void
1206 vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
1207 pixman_op_t op,
1208 uint32_t * dest,
1209 const uint32_t * src,
1210 const uint32_t * mask,
1211 int width)
1212 {
1213 int i;
1214 vector unsigned int vdest, vsrc, vmask;
1215 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1216 dest_mask, mask_mask, src_mask, store_mask;
1217
1218 COMPUTE_SHIFT_MASKC (dest, src, mask);
1219
1220 /* printf("%s\n",__PRETTY_FUNCTION__); */
1221 for (i = width / 4; i > 0; i--)
1222 {
1223 LOAD_VECTORSC (dest, src, mask);
1224
1225 vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
1226
1227 STORE_VECTOR (dest);
1228
1229 mask += 4;
1230 src += 4;
1231 dest += 4;
1232 }
1233
1234 for (i = width % 4; --i >= 0;)
1235 {
1236 uint32_t a = mask[i];
1237 uint32_t s = src[i];
1238 uint32_t d = dest[i];
1239 uint32_t ida = ALPHA_8 (~d);
1240
1241 UN8x4_MUL_UN8x4 (s, a);
1242 UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
1243
1244 dest[i] = s;
1245 }
1246 }
1247
1248 static void
1249 vmx_combine_in_ca (pixman_implementation_t *imp,
1250 pixman_op_t op,
1251 uint32_t * dest,
1252 const uint32_t * src,
1253 const uint32_t * mask,
1254 int width)
1255 {
1256 int i;
1257 vector unsigned int vdest, vsrc, vmask;
1258 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1259 dest_mask, mask_mask, src_mask, store_mask;
1260
1261 COMPUTE_SHIFT_MASKC (dest, src, mask);
1262
1263 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1264 for (i = width / 4; i > 0; i--)
1265 {
1266 LOAD_VECTORSC (dest, src, mask);
1267
1268 vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
1269
1270 STORE_VECTOR (dest);
1271
1272 src += 4;
1273 dest += 4;
1274 mask += 4;
1275 }
1276
1277 for (i = width % 4; --i >= 0;)
1278 {
1279 uint32_t a = mask[i];
1280 uint32_t s = src[i];
1281 uint32_t da = ALPHA_8 (dest[i]);
1282
1283 UN8x4_MUL_UN8x4 (s, a);
1284 UN8x4_MUL_UN8 (s, da);
1285
1286 dest[i] = s;
1287 }
1288 }
1289
1290 static void
1291 vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
1292 pixman_op_t op,
1293 uint32_t * dest,
1294 const uint32_t * src,
1295 const uint32_t * mask,
1296 int width)
1297 {
1298 int i;
1299 vector unsigned int vdest, vsrc, vmask;
1300 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1301 dest_mask, mask_mask, src_mask, store_mask;
1302
1303 COMPUTE_SHIFT_MASKC (dest, src, mask);
1304
1305 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1306 for (i = width / 4; i > 0; i--)
1307 {
1308
1309 LOAD_VECTORSC (dest, src, mask);
1310
1311 vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
1312
1313 STORE_VECTOR (dest);
1314
1315 src += 4;
1316 dest += 4;
1317 mask += 4;
1318 }
1319
1320 for (i = width % 4; --i >= 0;)
1321 {
1322 uint32_t a = mask[i];
1323 uint32_t d = dest[i];
1324 uint32_t sa = ALPHA_8 (src[i]);
1325
1326 UN8x4_MUL_UN8 (a, sa);
1327 UN8x4_MUL_UN8x4 (d, a);
1328
1329 dest[i] = d;
1330 }
1331 }
1332
1333 static void
1334 vmx_combine_out_ca (pixman_implementation_t *imp,
1335 pixman_op_t op,
1336 uint32_t * dest,
1337 const uint32_t * src,
1338 const uint32_t * mask,
1339 int width)
1340 {
1341 int i;
1342 vector unsigned int vdest, vsrc, vmask;
1343 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1344 dest_mask, mask_mask, src_mask, store_mask;
1345
1346 COMPUTE_SHIFT_MASKC (dest, src, mask);
1347
1348 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1349 for (i = width / 4; i > 0; i--)
1350 {
1351 LOAD_VECTORSC (dest, src, mask);
1352
1353 vdest = pix_multiply (
1354 pix_multiply (vsrc, vmask), splat_alpha (negate (vdest)));
1355
1356 STORE_VECTOR (dest);
1357
1358 src += 4;
1359 dest += 4;
1360 mask += 4;
1361 }
1362
1363 for (i = width % 4; --i >= 0;)
1364 {
1365 uint32_t a = mask[i];
1366 uint32_t s = src[i];
1367 uint32_t d = dest[i];
1368 uint32_t da = ALPHA_8 (~d);
1369
1370 UN8x4_MUL_UN8x4 (s, a);
1371 UN8x4_MUL_UN8 (s, da);
1372
1373 dest[i] = s;
1374 }
1375 }
1376
1377 static void
1378 vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
1379 pixman_op_t op,
1380 uint32_t * dest,
1381 const uint32_t * src,
1382 const uint32_t * mask,
1383 int width)
1384 {
1385 int i;
1386 vector unsigned int vdest, vsrc, vmask;
1387 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1388 dest_mask, mask_mask, src_mask, store_mask;
1389
1390 COMPUTE_SHIFT_MASKC (dest, src, mask);
1391
1392 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1393 for (i = width / 4; i > 0; i--)
1394 {
1395 LOAD_VECTORSC (dest, src, mask);
1396
1397 vdest = pix_multiply (
1398 vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
1399
1400 STORE_VECTOR (dest);
1401
1402 src += 4;
1403 dest += 4;
1404 mask += 4;
1405 }
1406
1407 for (i = width % 4; --i >= 0;)
1408 {
1409 uint32_t a = mask[i];
1410 uint32_t s = src[i];
1411 uint32_t d = dest[i];
1412 uint32_t sa = ALPHA_8 (s);
1413
1414 UN8x4_MUL_UN8 (a, sa);
1415 UN8x4_MUL_UN8x4 (d, ~a);
1416
1417 dest[i] = d;
1418 }
1419 }
1420
1421 static void
1422 vmx_combine_atop_ca (pixman_implementation_t *imp,
1423 pixman_op_t op,
1424 uint32_t * dest,
1425 const uint32_t * src,
1426 const uint32_t * mask,
1427 int width)
1428 {
1429 int i;
1430 vector unsigned int vdest, vsrc, vmask, vsrca;
1431 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1432 dest_mask, mask_mask, src_mask, store_mask;
1433
1434 COMPUTE_SHIFT_MASKC (dest, src, mask);
1435
1436 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1437 for (i = width / 4; i > 0; i--)
1438 {
1439 LOAD_VECTORSC (dest, src, mask);
1440
1441 vsrca = splat_alpha (vsrc);
1442
1443 vsrc = pix_multiply (vsrc, vmask);
1444 vmask = pix_multiply (vmask, vsrca);
1445
1446 vdest = pix_add_mul (vsrc, splat_alpha (vdest),
1447 negate (vmask), vdest);
1448
1449 STORE_VECTOR (dest);
1450
1451 src += 4;
1452 dest += 4;
1453 mask += 4;
1454 }
1455
1456 for (i = width % 4; --i >= 0;)
1457 {
1458 uint32_t a = mask[i];
1459 uint32_t s = src[i];
1460 uint32_t d = dest[i];
1461 uint32_t sa = ALPHA_8 (s);
1462 uint32_t da = ALPHA_8 (d);
1463
1464 UN8x4_MUL_UN8x4 (s, a);
1465 UN8x4_MUL_UN8 (a, sa);
1466 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1467
1468 dest[i] = d;
1469 }
1470 }
1471
1472 static void
1473 vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
1474 pixman_op_t op,
1475 uint32_t * dest,
1476 const uint32_t * src,
1477 const uint32_t * mask,
1478 int width)
1479 {
1480 int i;
1481 vector unsigned int vdest, vsrc, vmask;
1482 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1483 dest_mask, mask_mask, src_mask, store_mask;
1484
1485 COMPUTE_SHIFT_MASKC (dest, src, mask);
1486
1487 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1488 for (i = width / 4; i > 0; i--)
1489 {
1490 LOAD_VECTORSC (dest, src, mask);
1491
1492 vdest = pix_add_mul (vdest,
1493 pix_multiply (vmask, splat_alpha (vsrc)),
1494 pix_multiply (vsrc, vmask),
1495 negate (splat_alpha (vdest)));
1496
1497 STORE_VECTOR (dest);
1498
1499 src += 4;
1500 dest += 4;
1501 mask += 4;
1502 }
1503
1504 for (i = width % 4; --i >= 0;)
1505 {
1506 uint32_t a = mask[i];
1507 uint32_t s = src[i];
1508 uint32_t d = dest[i];
1509 uint32_t sa = ALPHA_8 (s);
1510 uint32_t da = ALPHA_8 (~d);
1511
1512 UN8x4_MUL_UN8x4 (s, a);
1513 UN8x4_MUL_UN8 (a, sa);
1514 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
1515
1516 dest[i] = d;
1517 }
1518 }
1519
1520 static void
1521 vmx_combine_xor_ca (pixman_implementation_t *imp,
1522 pixman_op_t op,
1523 uint32_t * dest,
1524 const uint32_t * src,
1525 const uint32_t * mask,
1526 int width)
1527 {
1528 int i;
1529 vector unsigned int vdest, vsrc, vmask;
1530 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1531 dest_mask, mask_mask, src_mask, store_mask;
1532
1533 COMPUTE_SHIFT_MASKC (dest, src, mask);
1534
1535 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1536 for (i = width / 4; i > 0; i--)
1537 {
1538 LOAD_VECTORSC (dest, src, mask);
1539
1540 vdest = pix_add_mul (vdest,
1541 negate (pix_multiply (vmask, splat_alpha (vsrc))),
1542 pix_multiply (vsrc, vmask),
1543 negate (splat_alpha (vdest)));
1544
1545 STORE_VECTOR (dest);
1546
1547 src += 4;
1548 dest += 4;
1549 mask += 4;
1550 }
1551
1552 for (i = width % 4; --i >= 0;)
1553 {
1554 uint32_t a = mask[i];
1555 uint32_t s = src[i];
1556 uint32_t d = dest[i];
1557 uint32_t sa = ALPHA_8 (s);
1558 uint32_t da = ALPHA_8 (~d);
1559
1560 UN8x4_MUL_UN8x4 (s, a);
1561 UN8x4_MUL_UN8 (a, sa);
1562 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
1563
1564 dest[i] = d;
1565 }
1566 }
1567
1568 static void
1569 vmx_combine_add_ca (pixman_implementation_t *imp,
1570 pixman_op_t op,
1571 uint32_t * dest,
1572 const uint32_t * src,
1573 const uint32_t * mask,
1574 int width)
1575 {
1576 int i;
1577 vector unsigned int vdest, vsrc, vmask;
1578 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
1579 dest_mask, mask_mask, src_mask, store_mask;
1580
1581 COMPUTE_SHIFT_MASKC (dest, src, mask);
1582
1583 /* printf ("%s\n",__PRETTY_FUNCTION__); */
1584 for (i = width / 4; i > 0; i--)
1585 {
1586 LOAD_VECTORSC (dest, src, mask);
1587
1588 vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
1589
1590 STORE_VECTOR (dest);
1591
1592 src += 4;
1593 dest += 4;
1594 mask += 4;
1595 }
1596
1597 for (i = width % 4; --i >= 0;)
1598 {
1599 uint32_t a = mask[i];
1600 uint32_t s = src[i];
1601 uint32_t d = dest[i];
1602
1603 UN8x4_MUL_UN8x4 (s, a);
1604 UN8x4_ADD_UN8x4 (s, d);
1605
1606 dest[i] = s;
1607 }
1608 }
1609
1610 static const pixman_fast_path_t vmx_fast_paths[] =
1611 {
1612 { PIXMAN_OP_NONE },
1613 };
1614
1615 pixman_implementation_t *
1616 _pixman_implementation_create_vmx (pixman_implementation_t *fallback)
1617 {
1618 pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths);
1619
1620 /* Set up function pointers */
1621
1622 imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u;
1623 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u;
1624 imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u;
1625 imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u;
1626 imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u;
1627 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u;
1628 imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
1629 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
1630 imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
1631
1632 imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
1633
1634 imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
1635 imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
1636 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
1637 imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca;
1638 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca;
1639 imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca;
1640 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca;
1641 imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca;
1642 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca;
1643 imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca;
1644 imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca;
1645
1646 return imp;
1647 }

mercurial