|
1 /* |
|
2 * Copyright © 2007 Luca Barbato |
|
3 * |
|
4 * Permission to use, copy, modify, distribute, and sell this software and its |
|
5 * documentation for any purpose is hereby granted without fee, provided that |
|
6 * the above copyright notice appear in all copies and that both that |
|
7 * copyright notice and this permission notice appear in supporting |
|
8 * documentation, and that the name of Luca Barbato not be used in advertising or |
|
9 * publicity pertaining to distribution of the software without specific, |
|
10 * written prior permission. Luca Barbato makes no representations about the |
|
11 * suitability of this software for any purpose. It is provided "as is" |
|
12 * without express or implied warranty. |
|
13 * |
|
14 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS |
|
15 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
16 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
17 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN |
|
19 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING |
|
20 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
21 * SOFTWARE. |
|
22 * |
|
23 * Author: Luca Barbato (lu_zero@gentoo.org) |
|
24 * |
|
25 * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell |
|
26 */ |
|
27 |
|
28 #include <config.h> |
|
29 #include "pixman-private.h" |
|
30 #include "pixman-combine32.h" |
|
31 #include <altivec.h> |
|
32 |
|
33 #define AVV(x...) {x} |
|
34 |
|
35 static force_inline vector unsigned int |
|
36 splat_alpha (vector unsigned int pix) |
|
37 { |
|
38 return vec_perm (pix, pix, |
|
39 (vector unsigned char)AVV ( |
|
40 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, |
|
41 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); |
|
42 } |
|
43 |
|
44 static force_inline vector unsigned int |
|
45 pix_multiply (vector unsigned int p, vector unsigned int a) |
|
46 { |
|
47 vector unsigned short hi, lo, mod; |
|
48 |
|
49 /* unpack to short */ |
|
50 hi = (vector unsigned short) |
|
51 vec_mergeh ((vector unsigned char)AVV (0), |
|
52 (vector unsigned char)p); |
|
53 |
|
54 mod = (vector unsigned short) |
|
55 vec_mergeh ((vector unsigned char)AVV (0), |
|
56 (vector unsigned char)a); |
|
57 |
|
58 hi = vec_mladd (hi, mod, (vector unsigned short) |
|
59 AVV (0x0080, 0x0080, 0x0080, 0x0080, |
|
60 0x0080, 0x0080, 0x0080, 0x0080)); |
|
61 |
|
62 hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); |
|
63 |
|
64 hi = vec_sr (hi, vec_splat_u16 (8)); |
|
65 |
|
66 /* unpack to short */ |
|
67 lo = (vector unsigned short) |
|
68 vec_mergel ((vector unsigned char)AVV (0), |
|
69 (vector unsigned char)p); |
|
70 mod = (vector unsigned short) |
|
71 vec_mergel ((vector unsigned char)AVV (0), |
|
72 (vector unsigned char)a); |
|
73 |
|
74 lo = vec_mladd (lo, mod, (vector unsigned short) |
|
75 AVV (0x0080, 0x0080, 0x0080, 0x0080, |
|
76 0x0080, 0x0080, 0x0080, 0x0080)); |
|
77 |
|
78 lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); |
|
79 |
|
80 lo = vec_sr (lo, vec_splat_u16 (8)); |
|
81 |
|
82 return (vector unsigned int)vec_packsu (hi, lo); |
|
83 } |
|
84 |
|
85 static force_inline vector unsigned int |
|
86 pix_add (vector unsigned int a, vector unsigned int b) |
|
87 { |
|
88 return (vector unsigned int)vec_adds ((vector unsigned char)a, |
|
89 (vector unsigned char)b); |
|
90 } |
|
91 |
|
92 static force_inline vector unsigned int |
|
93 pix_add_mul (vector unsigned int x, |
|
94 vector unsigned int a, |
|
95 vector unsigned int y, |
|
96 vector unsigned int b) |
|
97 { |
|
98 vector unsigned int t1, t2; |
|
99 |
|
100 t1 = pix_multiply (x, a); |
|
101 t2 = pix_multiply (y, b); |
|
102 |
|
103 return pix_add (t1, t2); |
|
104 } |
|
105 |
|
106 static force_inline vector unsigned int |
|
107 negate (vector unsigned int src) |
|
108 { |
|
109 return vec_nor (src, src); |
|
110 } |
|
111 |
|
112 /* dest*~srca + src */ |
|
113 static force_inline vector unsigned int |
|
114 over (vector unsigned int src, |
|
115 vector unsigned int srca, |
|
116 vector unsigned int dest) |
|
117 { |
|
118 vector unsigned char tmp = (vector unsigned char) |
|
119 pix_multiply (dest, negate (srca)); |
|
120 |
|
121 tmp = vec_adds ((vector unsigned char)src, tmp); |
|
122 return (vector unsigned int)tmp; |
|
123 } |
|
124 |
|
125 /* in == pix_multiply */ |
|
126 #define in_over(src, srca, mask, dest) \ |
|
127 over (pix_multiply (src, mask), \ |
|
128 pix_multiply (srca, mask), dest) |
|
129 |
|
130 |
|
131 #define COMPUTE_SHIFT_MASK(source) \ |
|
132 source ## _mask = vec_lvsl (0, source); |
|
133 |
|
134 #define COMPUTE_SHIFT_MASKS(dest, source) \ |
|
135 dest ## _mask = vec_lvsl (0, dest); \ |
|
136 source ## _mask = vec_lvsl (0, source); \ |
|
137 store_mask = vec_lvsr (0, dest); |
|
138 |
|
139 #define COMPUTE_SHIFT_MASKC(dest, source, mask) \ |
|
140 mask ## _mask = vec_lvsl (0, mask); \ |
|
141 dest ## _mask = vec_lvsl (0, dest); \ |
|
142 source ## _mask = vec_lvsl (0, source); \ |
|
143 store_mask = vec_lvsr (0, dest); |
|
144 |
|
145 /* notice you have to declare temp vars... |
|
146 * Note: tmp3 and tmp4 must remain untouched! |
|
147 */ |
|
148 |
|
149 #define LOAD_VECTORS(dest, source) \ |
|
150 tmp1 = (typeof(tmp1))vec_ld (0, source); \ |
|
151 tmp2 = (typeof(tmp2))vec_ld (15, source); \ |
|
152 tmp3 = (typeof(tmp3))vec_ld (0, dest); \ |
|
153 v ## source = (typeof(v ## source)) \ |
|
154 vec_perm (tmp1, tmp2, source ## _mask); \ |
|
155 tmp4 = (typeof(tmp4))vec_ld (15, dest); \ |
|
156 v ## dest = (typeof(v ## dest)) \ |
|
157 vec_perm (tmp3, tmp4, dest ## _mask); |
|
158 |
|
159 #define LOAD_VECTORSC(dest, source, mask) \ |
|
160 tmp1 = (typeof(tmp1))vec_ld (0, source); \ |
|
161 tmp2 = (typeof(tmp2))vec_ld (15, source); \ |
|
162 tmp3 = (typeof(tmp3))vec_ld (0, dest); \ |
|
163 v ## source = (typeof(v ## source)) \ |
|
164 vec_perm (tmp1, tmp2, source ## _mask); \ |
|
165 tmp4 = (typeof(tmp4))vec_ld (15, dest); \ |
|
166 tmp1 = (typeof(tmp1))vec_ld (0, mask); \ |
|
167 v ## dest = (typeof(v ## dest)) \ |
|
168 vec_perm (tmp3, tmp4, dest ## _mask); \ |
|
169 tmp2 = (typeof(tmp2))vec_ld (15, mask); \ |
|
170 v ## mask = (typeof(v ## mask)) \ |
|
171 vec_perm (tmp1, tmp2, mask ## _mask); |
|
172 |
|
173 #define LOAD_VECTORSM(dest, source, mask) \ |
|
174 LOAD_VECTORSC (dest, source, mask) \ |
|
175 v ## source = pix_multiply (v ## source, \ |
|
176 splat_alpha (v ## mask)); |
|
177 |
|
178 #define STORE_VECTOR(dest) \ |
|
179 edges = vec_perm (tmp4, tmp3, dest ## _mask); \ |
|
180 tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ |
|
181 tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ |
|
182 vec_st ((vector unsigned int) tmp3, 15, dest); \ |
|
183 vec_st ((vector unsigned int) tmp1, 0, dest); |
|
184 |
|
185 static void |
|
186 vmx_combine_over_u_no_mask (uint32_t * dest, |
|
187 const uint32_t *src, |
|
188 int width) |
|
189 { |
|
190 int i; |
|
191 vector unsigned int vdest, vsrc; |
|
192 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
193 dest_mask, src_mask, store_mask; |
|
194 |
|
195 COMPUTE_SHIFT_MASKS (dest, src); |
|
196 |
|
197 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
198 for (i = width / 4; i > 0; i--) |
|
199 { |
|
200 |
|
201 LOAD_VECTORS (dest, src); |
|
202 |
|
203 vdest = over (vsrc, splat_alpha (vsrc), vdest); |
|
204 |
|
205 STORE_VECTOR (dest); |
|
206 |
|
207 src += 4; |
|
208 dest += 4; |
|
209 } |
|
210 |
|
211 for (i = width % 4; --i >= 0;) |
|
212 { |
|
213 uint32_t s = src[i]; |
|
214 uint32_t d = dest[i]; |
|
215 uint32_t ia = ALPHA_8 (~s); |
|
216 |
|
217 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); |
|
218 |
|
219 dest[i] = d; |
|
220 } |
|
221 } |
|
222 |
|
223 static void |
|
224 vmx_combine_over_u_mask (uint32_t * dest, |
|
225 const uint32_t *src, |
|
226 const uint32_t *mask, |
|
227 int width) |
|
228 { |
|
229 int i; |
|
230 vector unsigned int vdest, vsrc, vmask; |
|
231 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
232 dest_mask, src_mask, mask_mask, store_mask; |
|
233 |
|
234 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
235 |
|
236 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
237 for (i = width / 4; i > 0; i--) |
|
238 { |
|
239 LOAD_VECTORSM (dest, src, mask); |
|
240 |
|
241 vdest = over (vsrc, splat_alpha (vsrc), vdest); |
|
242 |
|
243 STORE_VECTOR (dest); |
|
244 |
|
245 src += 4; |
|
246 dest += 4; |
|
247 mask += 4; |
|
248 } |
|
249 |
|
250 for (i = width % 4; --i >= 0;) |
|
251 { |
|
252 uint32_t m = ALPHA_8 (mask[i]); |
|
253 uint32_t s = src[i]; |
|
254 uint32_t d = dest[i]; |
|
255 uint32_t ia; |
|
256 |
|
257 UN8x4_MUL_UN8 (s, m); |
|
258 |
|
259 ia = ALPHA_8 (~s); |
|
260 |
|
261 UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); |
|
262 dest[i] = d; |
|
263 } |
|
264 } |
|
265 |
|
266 static void |
|
267 vmx_combine_over_u (pixman_implementation_t *imp, |
|
268 pixman_op_t op, |
|
269 uint32_t * dest, |
|
270 const uint32_t * src, |
|
271 const uint32_t * mask, |
|
272 int width) |
|
273 { |
|
274 if (mask) |
|
275 vmx_combine_over_u_mask (dest, src, mask, width); |
|
276 else |
|
277 vmx_combine_over_u_no_mask (dest, src, width); |
|
278 } |
|
279 |
|
280 static void |
|
281 vmx_combine_over_reverse_u_no_mask (uint32_t * dest, |
|
282 const uint32_t *src, |
|
283 int width) |
|
284 { |
|
285 int i; |
|
286 vector unsigned int vdest, vsrc; |
|
287 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
288 dest_mask, src_mask, store_mask; |
|
289 |
|
290 COMPUTE_SHIFT_MASKS (dest, src); |
|
291 |
|
292 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
293 for (i = width / 4; i > 0; i--) |
|
294 { |
|
295 |
|
296 LOAD_VECTORS (dest, src); |
|
297 |
|
298 vdest = over (vdest, splat_alpha (vdest), vsrc); |
|
299 |
|
300 STORE_VECTOR (dest); |
|
301 |
|
302 src += 4; |
|
303 dest += 4; |
|
304 } |
|
305 |
|
306 for (i = width % 4; --i >= 0;) |
|
307 { |
|
308 uint32_t s = src[i]; |
|
309 uint32_t d = dest[i]; |
|
310 uint32_t ia = ALPHA_8 (~dest[i]); |
|
311 |
|
312 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); |
|
313 dest[i] = s; |
|
314 } |
|
315 } |
|
316 |
|
317 static void |
|
318 vmx_combine_over_reverse_u_mask (uint32_t * dest, |
|
319 const uint32_t *src, |
|
320 const uint32_t *mask, |
|
321 int width) |
|
322 { |
|
323 int i; |
|
324 vector unsigned int vdest, vsrc, vmask; |
|
325 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
326 dest_mask, src_mask, mask_mask, store_mask; |
|
327 |
|
328 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
329 |
|
330 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
331 for (i = width / 4; i > 0; i--) |
|
332 { |
|
333 |
|
334 LOAD_VECTORSM (dest, src, mask); |
|
335 |
|
336 vdest = over (vdest, splat_alpha (vdest), vsrc); |
|
337 |
|
338 STORE_VECTOR (dest); |
|
339 |
|
340 src += 4; |
|
341 dest += 4; |
|
342 mask += 4; |
|
343 } |
|
344 |
|
345 for (i = width % 4; --i >= 0;) |
|
346 { |
|
347 uint32_t m = ALPHA_8 (mask[i]); |
|
348 uint32_t s = src[i]; |
|
349 uint32_t d = dest[i]; |
|
350 uint32_t ia = ALPHA_8 (~dest[i]); |
|
351 |
|
352 UN8x4_MUL_UN8 (s, m); |
|
353 |
|
354 UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); |
|
355 dest[i] = s; |
|
356 } |
|
357 } |
|
358 |
|
359 static void |
|
360 vmx_combine_over_reverse_u (pixman_implementation_t *imp, |
|
361 pixman_op_t op, |
|
362 uint32_t * dest, |
|
363 const uint32_t * src, |
|
364 const uint32_t * mask, |
|
365 int width) |
|
366 { |
|
367 if (mask) |
|
368 vmx_combine_over_reverse_u_mask (dest, src, mask, width); |
|
369 else |
|
370 vmx_combine_over_reverse_u_no_mask (dest, src, width); |
|
371 } |
|
372 |
|
373 static void |
|
374 vmx_combine_in_u_no_mask (uint32_t * dest, |
|
375 const uint32_t *src, |
|
376 int width) |
|
377 { |
|
378 int i; |
|
379 vector unsigned int vdest, vsrc; |
|
380 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
381 dest_mask, src_mask, store_mask; |
|
382 |
|
383 COMPUTE_SHIFT_MASKS (dest, src); |
|
384 |
|
385 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
386 for (i = width / 4; i > 0; i--) |
|
387 { |
|
388 LOAD_VECTORS (dest, src); |
|
389 |
|
390 vdest = pix_multiply (vsrc, splat_alpha (vdest)); |
|
391 |
|
392 STORE_VECTOR (dest); |
|
393 |
|
394 src += 4; |
|
395 dest += 4; |
|
396 } |
|
397 |
|
398 for (i = width % 4; --i >= 0;) |
|
399 { |
|
400 uint32_t s = src[i]; |
|
401 uint32_t a = ALPHA_8 (dest[i]); |
|
402 |
|
403 UN8x4_MUL_UN8 (s, a); |
|
404 dest[i] = s; |
|
405 } |
|
406 } |
|
407 |
|
408 static void |
|
409 vmx_combine_in_u_mask (uint32_t * dest, |
|
410 const uint32_t *src, |
|
411 const uint32_t *mask, |
|
412 int width) |
|
413 { |
|
414 int i; |
|
415 vector unsigned int vdest, vsrc, vmask; |
|
416 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
417 dest_mask, src_mask, mask_mask, store_mask; |
|
418 |
|
419 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
420 |
|
421 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
422 for (i = width / 4; i > 0; i--) |
|
423 { |
|
424 LOAD_VECTORSM (dest, src, mask); |
|
425 |
|
426 vdest = pix_multiply (vsrc, splat_alpha (vdest)); |
|
427 |
|
428 STORE_VECTOR (dest); |
|
429 |
|
430 src += 4; |
|
431 dest += 4; |
|
432 mask += 4; |
|
433 } |
|
434 |
|
435 for (i = width % 4; --i >= 0;) |
|
436 { |
|
437 uint32_t m = ALPHA_8 (mask[i]); |
|
438 uint32_t s = src[i]; |
|
439 uint32_t a = ALPHA_8 (dest[i]); |
|
440 |
|
441 UN8x4_MUL_UN8 (s, m); |
|
442 UN8x4_MUL_UN8 (s, a); |
|
443 |
|
444 dest[i] = s; |
|
445 } |
|
446 } |
|
447 |
|
448 static void |
|
449 vmx_combine_in_u (pixman_implementation_t *imp, |
|
450 pixman_op_t op, |
|
451 uint32_t * dest, |
|
452 const uint32_t * src, |
|
453 const uint32_t * mask, |
|
454 int width) |
|
455 { |
|
456 if (mask) |
|
457 vmx_combine_in_u_mask (dest, src, mask, width); |
|
458 else |
|
459 vmx_combine_in_u_no_mask (dest, src, width); |
|
460 } |
|
461 |
|
462 static void |
|
463 vmx_combine_in_reverse_u_no_mask (uint32_t * dest, |
|
464 const uint32_t *src, |
|
465 int width) |
|
466 { |
|
467 int i; |
|
468 vector unsigned int vdest, vsrc; |
|
469 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
470 dest_mask, src_mask, store_mask; |
|
471 |
|
472 COMPUTE_SHIFT_MASKS (dest, src); |
|
473 |
|
474 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
475 for (i = width / 4; i > 0; i--) |
|
476 { |
|
477 LOAD_VECTORS (dest, src); |
|
478 |
|
479 vdest = pix_multiply (vdest, splat_alpha (vsrc)); |
|
480 |
|
481 STORE_VECTOR (dest); |
|
482 |
|
483 src += 4; |
|
484 dest += 4; |
|
485 } |
|
486 |
|
487 for (i = width % 4; --i >= 0;) |
|
488 { |
|
489 uint32_t d = dest[i]; |
|
490 uint32_t a = ALPHA_8 (src[i]); |
|
491 |
|
492 UN8x4_MUL_UN8 (d, a); |
|
493 |
|
494 dest[i] = d; |
|
495 } |
|
496 } |
|
497 |
|
498 static void |
|
499 vmx_combine_in_reverse_u_mask (uint32_t * dest, |
|
500 const uint32_t *src, |
|
501 const uint32_t *mask, |
|
502 int width) |
|
503 { |
|
504 int i; |
|
505 vector unsigned int vdest, vsrc, vmask; |
|
506 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
507 dest_mask, src_mask, mask_mask, store_mask; |
|
508 |
|
509 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
510 |
|
511 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
512 for (i = width / 4; i > 0; i--) |
|
513 { |
|
514 LOAD_VECTORSM (dest, src, mask); |
|
515 |
|
516 vdest = pix_multiply (vdest, splat_alpha (vsrc)); |
|
517 |
|
518 STORE_VECTOR (dest); |
|
519 |
|
520 src += 4; |
|
521 dest += 4; |
|
522 mask += 4; |
|
523 } |
|
524 |
|
525 for (i = width % 4; --i >= 0;) |
|
526 { |
|
527 uint32_t m = ALPHA_8 (mask[i]); |
|
528 uint32_t d = dest[i]; |
|
529 uint32_t a = src[i]; |
|
530 |
|
531 UN8x4_MUL_UN8 (a, m); |
|
532 a = ALPHA_8 (a); |
|
533 UN8x4_MUL_UN8 (d, a); |
|
534 |
|
535 dest[i] = d; |
|
536 } |
|
537 } |
|
538 |
|
539 static void |
|
540 vmx_combine_in_reverse_u (pixman_implementation_t *imp, |
|
541 pixman_op_t op, |
|
542 uint32_t * dest, |
|
543 const uint32_t * src, |
|
544 const uint32_t * mask, |
|
545 int width) |
|
546 { |
|
547 if (mask) |
|
548 vmx_combine_in_reverse_u_mask (dest, src, mask, width); |
|
549 else |
|
550 vmx_combine_in_reverse_u_no_mask (dest, src, width); |
|
551 } |
|
552 |
|
553 static void |
|
554 vmx_combine_out_u_no_mask (uint32_t * dest, |
|
555 const uint32_t *src, |
|
556 int width) |
|
557 { |
|
558 int i; |
|
559 vector unsigned int vdest, vsrc; |
|
560 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
561 dest_mask, src_mask, store_mask; |
|
562 |
|
563 COMPUTE_SHIFT_MASKS (dest, src); |
|
564 |
|
565 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
566 for (i = width / 4; i > 0; i--) |
|
567 { |
|
568 LOAD_VECTORS (dest, src); |
|
569 |
|
570 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); |
|
571 |
|
572 STORE_VECTOR (dest); |
|
573 |
|
574 src += 4; |
|
575 dest += 4; |
|
576 } |
|
577 |
|
578 for (i = width % 4; --i >= 0;) |
|
579 { |
|
580 uint32_t s = src[i]; |
|
581 uint32_t a = ALPHA_8 (~dest[i]); |
|
582 |
|
583 UN8x4_MUL_UN8 (s, a); |
|
584 |
|
585 dest[i] = s; |
|
586 } |
|
587 } |
|
588 |
|
589 static void |
|
590 vmx_combine_out_u_mask (uint32_t * dest, |
|
591 const uint32_t *src, |
|
592 const uint32_t *mask, |
|
593 int width) |
|
594 { |
|
595 int i; |
|
596 vector unsigned int vdest, vsrc, vmask; |
|
597 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
598 dest_mask, src_mask, mask_mask, store_mask; |
|
599 |
|
600 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
601 |
|
602 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
603 for (i = width / 4; i > 0; i--) |
|
604 { |
|
605 LOAD_VECTORSM (dest, src, mask); |
|
606 |
|
607 vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); |
|
608 |
|
609 STORE_VECTOR (dest); |
|
610 |
|
611 src += 4; |
|
612 dest += 4; |
|
613 mask += 4; |
|
614 } |
|
615 |
|
616 for (i = width % 4; --i >= 0;) |
|
617 { |
|
618 uint32_t m = ALPHA_8 (mask[i]); |
|
619 uint32_t s = src[i]; |
|
620 uint32_t a = ALPHA_8 (~dest[i]); |
|
621 |
|
622 UN8x4_MUL_UN8 (s, m); |
|
623 UN8x4_MUL_UN8 (s, a); |
|
624 |
|
625 dest[i] = s; |
|
626 } |
|
627 } |
|
628 |
|
629 static void |
|
630 vmx_combine_out_u (pixman_implementation_t *imp, |
|
631 pixman_op_t op, |
|
632 uint32_t * dest, |
|
633 const uint32_t * src, |
|
634 const uint32_t * mask, |
|
635 int width) |
|
636 { |
|
637 if (mask) |
|
638 vmx_combine_out_u_mask (dest, src, mask, width); |
|
639 else |
|
640 vmx_combine_out_u_no_mask (dest, src, width); |
|
641 } |
|
642 |
|
643 static void |
|
644 vmx_combine_out_reverse_u_no_mask (uint32_t * dest, |
|
645 const uint32_t *src, |
|
646 int width) |
|
647 { |
|
648 int i; |
|
649 vector unsigned int vdest, vsrc; |
|
650 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
651 dest_mask, src_mask, store_mask; |
|
652 |
|
653 COMPUTE_SHIFT_MASKS (dest, src); |
|
654 |
|
655 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
656 for (i = width / 4; i > 0; i--) |
|
657 { |
|
658 |
|
659 LOAD_VECTORS (dest, src); |
|
660 |
|
661 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); |
|
662 |
|
663 STORE_VECTOR (dest); |
|
664 |
|
665 src += 4; |
|
666 dest += 4; |
|
667 } |
|
668 |
|
669 for (i = width % 4; --i >= 0;) |
|
670 { |
|
671 uint32_t d = dest[i]; |
|
672 uint32_t a = ALPHA_8 (~src[i]); |
|
673 |
|
674 UN8x4_MUL_UN8 (d, a); |
|
675 |
|
676 dest[i] = d; |
|
677 } |
|
678 } |
|
679 |
|
680 static void |
|
681 vmx_combine_out_reverse_u_mask (uint32_t * dest, |
|
682 const uint32_t *src, |
|
683 const uint32_t *mask, |
|
684 int width) |
|
685 { |
|
686 int i; |
|
687 vector unsigned int vdest, vsrc, vmask; |
|
688 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
689 dest_mask, src_mask, mask_mask, store_mask; |
|
690 |
|
691 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
692 |
|
693 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
694 for (i = width / 4; i > 0; i--) |
|
695 { |
|
696 LOAD_VECTORSM (dest, src, mask); |
|
697 |
|
698 vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); |
|
699 |
|
700 STORE_VECTOR (dest); |
|
701 |
|
702 src += 4; |
|
703 dest += 4; |
|
704 mask += 4; |
|
705 } |
|
706 |
|
707 for (i = width % 4; --i >= 0;) |
|
708 { |
|
709 uint32_t m = ALPHA_8 (mask[i]); |
|
710 uint32_t d = dest[i]; |
|
711 uint32_t a = src[i]; |
|
712 |
|
713 UN8x4_MUL_UN8 (a, m); |
|
714 a = ALPHA_8 (~a); |
|
715 UN8x4_MUL_UN8 (d, a); |
|
716 |
|
717 dest[i] = d; |
|
718 } |
|
719 } |
|
720 |
|
721 static void |
|
722 vmx_combine_out_reverse_u (pixman_implementation_t *imp, |
|
723 pixman_op_t op, |
|
724 uint32_t * dest, |
|
725 const uint32_t * src, |
|
726 const uint32_t * mask, |
|
727 int width) |
|
728 { |
|
729 if (mask) |
|
730 vmx_combine_out_reverse_u_mask (dest, src, mask, width); |
|
731 else |
|
732 vmx_combine_out_reverse_u_no_mask (dest, src, width); |
|
733 } |
|
734 |
|
735 static void |
|
736 vmx_combine_atop_u_no_mask (uint32_t * dest, |
|
737 const uint32_t *src, |
|
738 int width) |
|
739 { |
|
740 int i; |
|
741 vector unsigned int vdest, vsrc; |
|
742 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
743 dest_mask, src_mask, store_mask; |
|
744 |
|
745 COMPUTE_SHIFT_MASKS (dest, src); |
|
746 |
|
747 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
748 for (i = width / 4; i > 0; i--) |
|
749 { |
|
750 LOAD_VECTORS (dest, src); |
|
751 |
|
752 vdest = pix_add_mul (vsrc, splat_alpha (vdest), |
|
753 vdest, splat_alpha (negate (vsrc))); |
|
754 |
|
755 STORE_VECTOR (dest); |
|
756 |
|
757 src += 4; |
|
758 dest += 4; |
|
759 } |
|
760 |
|
761 for (i = width % 4; --i >= 0;) |
|
762 { |
|
763 uint32_t s = src[i]; |
|
764 uint32_t d = dest[i]; |
|
765 uint32_t dest_a = ALPHA_8 (d); |
|
766 uint32_t src_ia = ALPHA_8 (~s); |
|
767 |
|
768 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); |
|
769 |
|
770 dest[i] = s; |
|
771 } |
|
772 } |
|
773 |
|
774 static void |
|
775 vmx_combine_atop_u_mask (uint32_t * dest, |
|
776 const uint32_t *src, |
|
777 const uint32_t *mask, |
|
778 int width) |
|
779 { |
|
780 int i; |
|
781 vector unsigned int vdest, vsrc, vmask; |
|
782 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
783 dest_mask, src_mask, mask_mask, store_mask; |
|
784 |
|
785 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
786 |
|
787 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
788 for (i = width / 4; i > 0; i--) |
|
789 { |
|
790 LOAD_VECTORSM (dest, src, mask); |
|
791 |
|
792 vdest = pix_add_mul (vsrc, splat_alpha (vdest), |
|
793 vdest, splat_alpha (negate (vsrc))); |
|
794 |
|
795 STORE_VECTOR (dest); |
|
796 |
|
797 src += 4; |
|
798 dest += 4; |
|
799 mask += 4; |
|
800 } |
|
801 |
|
802 for (i = width % 4; --i >= 0;) |
|
803 { |
|
804 uint32_t m = ALPHA_8 (mask[i]); |
|
805 uint32_t s = src[i]; |
|
806 uint32_t d = dest[i]; |
|
807 uint32_t dest_a = ALPHA_8 (d); |
|
808 uint32_t src_ia; |
|
809 |
|
810 UN8x4_MUL_UN8 (s, m); |
|
811 |
|
812 src_ia = ALPHA_8 (~s); |
|
813 |
|
814 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); |
|
815 |
|
816 dest[i] = s; |
|
817 } |
|
818 } |
|
819 |
|
820 static void |
|
821 vmx_combine_atop_u (pixman_implementation_t *imp, |
|
822 pixman_op_t op, |
|
823 uint32_t * dest, |
|
824 const uint32_t * src, |
|
825 const uint32_t * mask, |
|
826 int width) |
|
827 { |
|
828 if (mask) |
|
829 vmx_combine_atop_u_mask (dest, src, mask, width); |
|
830 else |
|
831 vmx_combine_atop_u_no_mask (dest, src, width); |
|
832 } |
|
833 |
|
834 static void |
|
835 vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, |
|
836 const uint32_t *src, |
|
837 int width) |
|
838 { |
|
839 int i; |
|
840 vector unsigned int vdest, vsrc; |
|
841 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
842 dest_mask, src_mask, store_mask; |
|
843 |
|
844 COMPUTE_SHIFT_MASKS (dest, src); |
|
845 |
|
846 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
847 for (i = width / 4; i > 0; i--) |
|
848 { |
|
849 LOAD_VECTORS (dest, src); |
|
850 |
|
851 vdest = pix_add_mul (vdest, splat_alpha (vsrc), |
|
852 vsrc, splat_alpha (negate (vdest))); |
|
853 |
|
854 STORE_VECTOR (dest); |
|
855 |
|
856 src += 4; |
|
857 dest += 4; |
|
858 } |
|
859 |
|
860 for (i = width % 4; --i >= 0;) |
|
861 { |
|
862 uint32_t s = src[i]; |
|
863 uint32_t d = dest[i]; |
|
864 uint32_t src_a = ALPHA_8 (s); |
|
865 uint32_t dest_ia = ALPHA_8 (~d); |
|
866 |
|
867 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); |
|
868 |
|
869 dest[i] = s; |
|
870 } |
|
871 } |
|
872 |
|
873 static void |
|
874 vmx_combine_atop_reverse_u_mask (uint32_t * dest, |
|
875 const uint32_t *src, |
|
876 const uint32_t *mask, |
|
877 int width) |
|
878 { |
|
879 int i; |
|
880 vector unsigned int vdest, vsrc, vmask; |
|
881 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
882 dest_mask, src_mask, mask_mask, store_mask; |
|
883 |
|
884 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
885 |
|
886 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
887 for (i = width / 4; i > 0; i--) |
|
888 { |
|
889 LOAD_VECTORSM (dest, src, mask); |
|
890 |
|
891 vdest = pix_add_mul (vdest, splat_alpha (vsrc), |
|
892 vsrc, splat_alpha (negate (vdest))); |
|
893 |
|
894 STORE_VECTOR (dest); |
|
895 |
|
896 src += 4; |
|
897 dest += 4; |
|
898 mask += 4; |
|
899 } |
|
900 |
|
901 for (i = width % 4; --i >= 0;) |
|
902 { |
|
903 uint32_t m = ALPHA_8 (mask[i]); |
|
904 uint32_t s = src[i]; |
|
905 uint32_t d = dest[i]; |
|
906 uint32_t src_a; |
|
907 uint32_t dest_ia = ALPHA_8 (~d); |
|
908 |
|
909 UN8x4_MUL_UN8 (s, m); |
|
910 |
|
911 src_a = ALPHA_8 (s); |
|
912 |
|
913 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); |
|
914 |
|
915 dest[i] = s; |
|
916 } |
|
917 } |
|
918 |
|
919 static void |
|
920 vmx_combine_atop_reverse_u (pixman_implementation_t *imp, |
|
921 pixman_op_t op, |
|
922 uint32_t * dest, |
|
923 const uint32_t * src, |
|
924 const uint32_t * mask, |
|
925 int width) |
|
926 { |
|
927 if (mask) |
|
928 vmx_combine_atop_reverse_u_mask (dest, src, mask, width); |
|
929 else |
|
930 vmx_combine_atop_reverse_u_no_mask (dest, src, width); |
|
931 } |
|
932 |
|
933 static void |
|
934 vmx_combine_xor_u_no_mask (uint32_t * dest, |
|
935 const uint32_t *src, |
|
936 int width) |
|
937 { |
|
938 int i; |
|
939 vector unsigned int vdest, vsrc; |
|
940 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
941 dest_mask, src_mask, store_mask; |
|
942 |
|
943 COMPUTE_SHIFT_MASKS (dest, src); |
|
944 |
|
945 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
946 for (i = width / 4; i > 0; i--) |
|
947 { |
|
948 LOAD_VECTORS (dest, src); |
|
949 |
|
950 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), |
|
951 vdest, splat_alpha (negate (vsrc))); |
|
952 |
|
953 STORE_VECTOR (dest); |
|
954 |
|
955 src += 4; |
|
956 dest += 4; |
|
957 } |
|
958 |
|
959 for (i = width % 4; --i >= 0;) |
|
960 { |
|
961 uint32_t s = src[i]; |
|
962 uint32_t d = dest[i]; |
|
963 uint32_t src_ia = ALPHA_8 (~s); |
|
964 uint32_t dest_ia = ALPHA_8 (~d); |
|
965 |
|
966 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); |
|
967 |
|
968 dest[i] = s; |
|
969 } |
|
970 } |
|
971 |
|
972 static void |
|
973 vmx_combine_xor_u_mask (uint32_t * dest, |
|
974 const uint32_t *src, |
|
975 const uint32_t *mask, |
|
976 int width) |
|
977 { |
|
978 int i; |
|
979 vector unsigned int vdest, vsrc, vmask; |
|
980 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
981 dest_mask, src_mask, mask_mask, store_mask; |
|
982 |
|
983 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
984 |
|
985 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
986 for (i = width / 4; i > 0; i--) |
|
987 { |
|
988 LOAD_VECTORSM (dest, src, mask); |
|
989 |
|
990 vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), |
|
991 vdest, splat_alpha (negate (vsrc))); |
|
992 |
|
993 STORE_VECTOR (dest); |
|
994 |
|
995 src += 4; |
|
996 dest += 4; |
|
997 mask += 4; |
|
998 } |
|
999 |
|
1000 for (i = width % 4; --i >= 0;) |
|
1001 { |
|
1002 uint32_t m = ALPHA_8 (mask[i]); |
|
1003 uint32_t s = src[i]; |
|
1004 uint32_t d = dest[i]; |
|
1005 uint32_t src_ia; |
|
1006 uint32_t dest_ia = ALPHA_8 (~d); |
|
1007 |
|
1008 UN8x4_MUL_UN8 (s, m); |
|
1009 |
|
1010 src_ia = ALPHA_8 (~s); |
|
1011 |
|
1012 UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); |
|
1013 |
|
1014 dest[i] = s; |
|
1015 } |
|
1016 } |
|
1017 |
|
1018 static void |
|
1019 vmx_combine_xor_u (pixman_implementation_t *imp, |
|
1020 pixman_op_t op, |
|
1021 uint32_t * dest, |
|
1022 const uint32_t * src, |
|
1023 const uint32_t * mask, |
|
1024 int width) |
|
1025 { |
|
1026 if (mask) |
|
1027 vmx_combine_xor_u_mask (dest, src, mask, width); |
|
1028 else |
|
1029 vmx_combine_xor_u_no_mask (dest, src, width); |
|
1030 } |
|
1031 |
|
1032 static void |
|
1033 vmx_combine_add_u_no_mask (uint32_t * dest, |
|
1034 const uint32_t *src, |
|
1035 int width) |
|
1036 { |
|
1037 int i; |
|
1038 vector unsigned int vdest, vsrc; |
|
1039 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1040 dest_mask, src_mask, store_mask; |
|
1041 |
|
1042 COMPUTE_SHIFT_MASKS (dest, src); |
|
1043 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1044 for (i = width / 4; i > 0; i--) |
|
1045 { |
|
1046 LOAD_VECTORS (dest, src); |
|
1047 |
|
1048 vdest = pix_add (vsrc, vdest); |
|
1049 |
|
1050 STORE_VECTOR (dest); |
|
1051 |
|
1052 src += 4; |
|
1053 dest += 4; |
|
1054 } |
|
1055 |
|
1056 for (i = width % 4; --i >= 0;) |
|
1057 { |
|
1058 uint32_t s = src[i]; |
|
1059 uint32_t d = dest[i]; |
|
1060 |
|
1061 UN8x4_ADD_UN8x4 (d, s); |
|
1062 |
|
1063 dest[i] = d; |
|
1064 } |
|
1065 } |
|
1066 |
|
1067 static void |
|
1068 vmx_combine_add_u_mask (uint32_t * dest, |
|
1069 const uint32_t *src, |
|
1070 const uint32_t *mask, |
|
1071 int width) |
|
1072 { |
|
1073 int i; |
|
1074 vector unsigned int vdest, vsrc, vmask; |
|
1075 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1076 dest_mask, src_mask, mask_mask, store_mask; |
|
1077 |
|
1078 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1079 |
|
1080 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1081 for (i = width / 4; i > 0; i--) |
|
1082 { |
|
1083 LOAD_VECTORSM (dest, src, mask); |
|
1084 |
|
1085 vdest = pix_add (vsrc, vdest); |
|
1086 |
|
1087 STORE_VECTOR (dest); |
|
1088 |
|
1089 src += 4; |
|
1090 dest += 4; |
|
1091 mask += 4; |
|
1092 } |
|
1093 |
|
1094 for (i = width % 4; --i >= 0;) |
|
1095 { |
|
1096 uint32_t m = ALPHA_8 (mask[i]); |
|
1097 uint32_t s = src[i]; |
|
1098 uint32_t d = dest[i]; |
|
1099 |
|
1100 UN8x4_MUL_UN8 (s, m); |
|
1101 UN8x4_ADD_UN8x4 (d, s); |
|
1102 |
|
1103 dest[i] = d; |
|
1104 } |
|
1105 } |
|
1106 |
|
1107 static void |
|
1108 vmx_combine_add_u (pixman_implementation_t *imp, |
|
1109 pixman_op_t op, |
|
1110 uint32_t * dest, |
|
1111 const uint32_t * src, |
|
1112 const uint32_t * mask, |
|
1113 int width) |
|
1114 { |
|
1115 if (mask) |
|
1116 vmx_combine_add_u_mask (dest, src, mask, width); |
|
1117 else |
|
1118 vmx_combine_add_u_no_mask (dest, src, width); |
|
1119 } |
|
1120 |
|
1121 static void |
|
1122 vmx_combine_src_ca (pixman_implementation_t *imp, |
|
1123 pixman_op_t op, |
|
1124 uint32_t * dest, |
|
1125 const uint32_t * src, |
|
1126 const uint32_t * mask, |
|
1127 int width) |
|
1128 { |
|
1129 int i; |
|
1130 vector unsigned int vdest, vsrc, vmask; |
|
1131 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1132 dest_mask, mask_mask, src_mask, store_mask; |
|
1133 |
|
1134 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1135 |
|
1136 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1137 for (i = width / 4; i > 0; i--) |
|
1138 { |
|
1139 LOAD_VECTORSC (dest, src, mask); |
|
1140 |
|
1141 vdest = pix_multiply (vsrc, vmask); |
|
1142 |
|
1143 STORE_VECTOR (dest); |
|
1144 |
|
1145 mask += 4; |
|
1146 src += 4; |
|
1147 dest += 4; |
|
1148 } |
|
1149 |
|
1150 for (i = width % 4; --i >= 0;) |
|
1151 { |
|
1152 uint32_t a = mask[i]; |
|
1153 uint32_t s = src[i]; |
|
1154 |
|
1155 UN8x4_MUL_UN8x4 (s, a); |
|
1156 |
|
1157 dest[i] = s; |
|
1158 } |
|
1159 } |
|
1160 |
|
1161 static void |
|
1162 vmx_combine_over_ca (pixman_implementation_t *imp, |
|
1163 pixman_op_t op, |
|
1164 uint32_t * dest, |
|
1165 const uint32_t * src, |
|
1166 const uint32_t * mask, |
|
1167 int width) |
|
1168 { |
|
1169 int i; |
|
1170 vector unsigned int vdest, vsrc, vmask; |
|
1171 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1172 dest_mask, mask_mask, src_mask, store_mask; |
|
1173 |
|
1174 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1175 |
|
1176 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1177 for (i = width / 4; i > 0; i--) |
|
1178 { |
|
1179 LOAD_VECTORSC (dest, src, mask); |
|
1180 |
|
1181 vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); |
|
1182 |
|
1183 STORE_VECTOR (dest); |
|
1184 |
|
1185 mask += 4; |
|
1186 src += 4; |
|
1187 dest += 4; |
|
1188 } |
|
1189 |
|
1190 for (i = width % 4; --i >= 0;) |
|
1191 { |
|
1192 uint32_t a = mask[i]; |
|
1193 uint32_t s = src[i]; |
|
1194 uint32_t d = dest[i]; |
|
1195 uint32_t sa = ALPHA_8 (s); |
|
1196 |
|
1197 UN8x4_MUL_UN8x4 (s, a); |
|
1198 UN8x4_MUL_UN8 (a, sa); |
|
1199 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); |
|
1200 |
|
1201 dest[i] = d; |
|
1202 } |
|
1203 } |
|
1204 |
|
1205 static void |
|
1206 vmx_combine_over_reverse_ca (pixman_implementation_t *imp, |
|
1207 pixman_op_t op, |
|
1208 uint32_t * dest, |
|
1209 const uint32_t * src, |
|
1210 const uint32_t * mask, |
|
1211 int width) |
|
1212 { |
|
1213 int i; |
|
1214 vector unsigned int vdest, vsrc, vmask; |
|
1215 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1216 dest_mask, mask_mask, src_mask, store_mask; |
|
1217 |
|
1218 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1219 |
|
1220 /* printf("%s\n",__PRETTY_FUNCTION__); */ |
|
1221 for (i = width / 4; i > 0; i--) |
|
1222 { |
|
1223 LOAD_VECTORSC (dest, src, mask); |
|
1224 |
|
1225 vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); |
|
1226 |
|
1227 STORE_VECTOR (dest); |
|
1228 |
|
1229 mask += 4; |
|
1230 src += 4; |
|
1231 dest += 4; |
|
1232 } |
|
1233 |
|
1234 for (i = width % 4; --i >= 0;) |
|
1235 { |
|
1236 uint32_t a = mask[i]; |
|
1237 uint32_t s = src[i]; |
|
1238 uint32_t d = dest[i]; |
|
1239 uint32_t ida = ALPHA_8 (~d); |
|
1240 |
|
1241 UN8x4_MUL_UN8x4 (s, a); |
|
1242 UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); |
|
1243 |
|
1244 dest[i] = s; |
|
1245 } |
|
1246 } |
|
1247 |
|
1248 static void |
|
1249 vmx_combine_in_ca (pixman_implementation_t *imp, |
|
1250 pixman_op_t op, |
|
1251 uint32_t * dest, |
|
1252 const uint32_t * src, |
|
1253 const uint32_t * mask, |
|
1254 int width) |
|
1255 { |
|
1256 int i; |
|
1257 vector unsigned int vdest, vsrc, vmask; |
|
1258 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1259 dest_mask, mask_mask, src_mask, store_mask; |
|
1260 |
|
1261 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1262 |
|
1263 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1264 for (i = width / 4; i > 0; i--) |
|
1265 { |
|
1266 LOAD_VECTORSC (dest, src, mask); |
|
1267 |
|
1268 vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); |
|
1269 |
|
1270 STORE_VECTOR (dest); |
|
1271 |
|
1272 src += 4; |
|
1273 dest += 4; |
|
1274 mask += 4; |
|
1275 } |
|
1276 |
|
1277 for (i = width % 4; --i >= 0;) |
|
1278 { |
|
1279 uint32_t a = mask[i]; |
|
1280 uint32_t s = src[i]; |
|
1281 uint32_t da = ALPHA_8 (dest[i]); |
|
1282 |
|
1283 UN8x4_MUL_UN8x4 (s, a); |
|
1284 UN8x4_MUL_UN8 (s, da); |
|
1285 |
|
1286 dest[i] = s; |
|
1287 } |
|
1288 } |
|
1289 |
|
1290 static void |
|
1291 vmx_combine_in_reverse_ca (pixman_implementation_t *imp, |
|
1292 pixman_op_t op, |
|
1293 uint32_t * dest, |
|
1294 const uint32_t * src, |
|
1295 const uint32_t * mask, |
|
1296 int width) |
|
1297 { |
|
1298 int i; |
|
1299 vector unsigned int vdest, vsrc, vmask; |
|
1300 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1301 dest_mask, mask_mask, src_mask, store_mask; |
|
1302 |
|
1303 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1304 |
|
1305 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1306 for (i = width / 4; i > 0; i--) |
|
1307 { |
|
1308 |
|
1309 LOAD_VECTORSC (dest, src, mask); |
|
1310 |
|
1311 vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); |
|
1312 |
|
1313 STORE_VECTOR (dest); |
|
1314 |
|
1315 src += 4; |
|
1316 dest += 4; |
|
1317 mask += 4; |
|
1318 } |
|
1319 |
|
1320 for (i = width % 4; --i >= 0;) |
|
1321 { |
|
1322 uint32_t a = mask[i]; |
|
1323 uint32_t d = dest[i]; |
|
1324 uint32_t sa = ALPHA_8 (src[i]); |
|
1325 |
|
1326 UN8x4_MUL_UN8 (a, sa); |
|
1327 UN8x4_MUL_UN8x4 (d, a); |
|
1328 |
|
1329 dest[i] = d; |
|
1330 } |
|
1331 } |
|
1332 |
|
1333 static void |
|
1334 vmx_combine_out_ca (pixman_implementation_t *imp, |
|
1335 pixman_op_t op, |
|
1336 uint32_t * dest, |
|
1337 const uint32_t * src, |
|
1338 const uint32_t * mask, |
|
1339 int width) |
|
1340 { |
|
1341 int i; |
|
1342 vector unsigned int vdest, vsrc, vmask; |
|
1343 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1344 dest_mask, mask_mask, src_mask, store_mask; |
|
1345 |
|
1346 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1347 |
|
1348 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1349 for (i = width / 4; i > 0; i--) |
|
1350 { |
|
1351 LOAD_VECTORSC (dest, src, mask); |
|
1352 |
|
1353 vdest = pix_multiply ( |
|
1354 pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); |
|
1355 |
|
1356 STORE_VECTOR (dest); |
|
1357 |
|
1358 src += 4; |
|
1359 dest += 4; |
|
1360 mask += 4; |
|
1361 } |
|
1362 |
|
1363 for (i = width % 4; --i >= 0;) |
|
1364 { |
|
1365 uint32_t a = mask[i]; |
|
1366 uint32_t s = src[i]; |
|
1367 uint32_t d = dest[i]; |
|
1368 uint32_t da = ALPHA_8 (~d); |
|
1369 |
|
1370 UN8x4_MUL_UN8x4 (s, a); |
|
1371 UN8x4_MUL_UN8 (s, da); |
|
1372 |
|
1373 dest[i] = s; |
|
1374 } |
|
1375 } |
|
1376 |
|
1377 static void |
|
1378 vmx_combine_out_reverse_ca (pixman_implementation_t *imp, |
|
1379 pixman_op_t op, |
|
1380 uint32_t * dest, |
|
1381 const uint32_t * src, |
|
1382 const uint32_t * mask, |
|
1383 int width) |
|
1384 { |
|
1385 int i; |
|
1386 vector unsigned int vdest, vsrc, vmask; |
|
1387 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1388 dest_mask, mask_mask, src_mask, store_mask; |
|
1389 |
|
1390 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1391 |
|
1392 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1393 for (i = width / 4; i > 0; i--) |
|
1394 { |
|
1395 LOAD_VECTORSC (dest, src, mask); |
|
1396 |
|
1397 vdest = pix_multiply ( |
|
1398 vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); |
|
1399 |
|
1400 STORE_VECTOR (dest); |
|
1401 |
|
1402 src += 4; |
|
1403 dest += 4; |
|
1404 mask += 4; |
|
1405 } |
|
1406 |
|
1407 for (i = width % 4; --i >= 0;) |
|
1408 { |
|
1409 uint32_t a = mask[i]; |
|
1410 uint32_t s = src[i]; |
|
1411 uint32_t d = dest[i]; |
|
1412 uint32_t sa = ALPHA_8 (s); |
|
1413 |
|
1414 UN8x4_MUL_UN8 (a, sa); |
|
1415 UN8x4_MUL_UN8x4 (d, ~a); |
|
1416 |
|
1417 dest[i] = d; |
|
1418 } |
|
1419 } |
|
1420 |
|
1421 static void |
|
1422 vmx_combine_atop_ca (pixman_implementation_t *imp, |
|
1423 pixman_op_t op, |
|
1424 uint32_t * dest, |
|
1425 const uint32_t * src, |
|
1426 const uint32_t * mask, |
|
1427 int width) |
|
1428 { |
|
1429 int i; |
|
1430 vector unsigned int vdest, vsrc, vmask, vsrca; |
|
1431 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1432 dest_mask, mask_mask, src_mask, store_mask; |
|
1433 |
|
1434 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1435 |
|
1436 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1437 for (i = width / 4; i > 0; i--) |
|
1438 { |
|
1439 LOAD_VECTORSC (dest, src, mask); |
|
1440 |
|
1441 vsrca = splat_alpha (vsrc); |
|
1442 |
|
1443 vsrc = pix_multiply (vsrc, vmask); |
|
1444 vmask = pix_multiply (vmask, vsrca); |
|
1445 |
|
1446 vdest = pix_add_mul (vsrc, splat_alpha (vdest), |
|
1447 negate (vmask), vdest); |
|
1448 |
|
1449 STORE_VECTOR (dest); |
|
1450 |
|
1451 src += 4; |
|
1452 dest += 4; |
|
1453 mask += 4; |
|
1454 } |
|
1455 |
|
1456 for (i = width % 4; --i >= 0;) |
|
1457 { |
|
1458 uint32_t a = mask[i]; |
|
1459 uint32_t s = src[i]; |
|
1460 uint32_t d = dest[i]; |
|
1461 uint32_t sa = ALPHA_8 (s); |
|
1462 uint32_t da = ALPHA_8 (d); |
|
1463 |
|
1464 UN8x4_MUL_UN8x4 (s, a); |
|
1465 UN8x4_MUL_UN8 (a, sa); |
|
1466 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); |
|
1467 |
|
1468 dest[i] = d; |
|
1469 } |
|
1470 } |
|
1471 |
|
1472 static void |
|
1473 vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, |
|
1474 pixman_op_t op, |
|
1475 uint32_t * dest, |
|
1476 const uint32_t * src, |
|
1477 const uint32_t * mask, |
|
1478 int width) |
|
1479 { |
|
1480 int i; |
|
1481 vector unsigned int vdest, vsrc, vmask; |
|
1482 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1483 dest_mask, mask_mask, src_mask, store_mask; |
|
1484 |
|
1485 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1486 |
|
1487 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1488 for (i = width / 4; i > 0; i--) |
|
1489 { |
|
1490 LOAD_VECTORSC (dest, src, mask); |
|
1491 |
|
1492 vdest = pix_add_mul (vdest, |
|
1493 pix_multiply (vmask, splat_alpha (vsrc)), |
|
1494 pix_multiply (vsrc, vmask), |
|
1495 negate (splat_alpha (vdest))); |
|
1496 |
|
1497 STORE_VECTOR (dest); |
|
1498 |
|
1499 src += 4; |
|
1500 dest += 4; |
|
1501 mask += 4; |
|
1502 } |
|
1503 |
|
1504 for (i = width % 4; --i >= 0;) |
|
1505 { |
|
1506 uint32_t a = mask[i]; |
|
1507 uint32_t s = src[i]; |
|
1508 uint32_t d = dest[i]; |
|
1509 uint32_t sa = ALPHA_8 (s); |
|
1510 uint32_t da = ALPHA_8 (~d); |
|
1511 |
|
1512 UN8x4_MUL_UN8x4 (s, a); |
|
1513 UN8x4_MUL_UN8 (a, sa); |
|
1514 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); |
|
1515 |
|
1516 dest[i] = d; |
|
1517 } |
|
1518 } |
|
1519 |
|
1520 static void |
|
1521 vmx_combine_xor_ca (pixman_implementation_t *imp, |
|
1522 pixman_op_t op, |
|
1523 uint32_t * dest, |
|
1524 const uint32_t * src, |
|
1525 const uint32_t * mask, |
|
1526 int width) |
|
1527 { |
|
1528 int i; |
|
1529 vector unsigned int vdest, vsrc, vmask; |
|
1530 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1531 dest_mask, mask_mask, src_mask, store_mask; |
|
1532 |
|
1533 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1534 |
|
1535 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1536 for (i = width / 4; i > 0; i--) |
|
1537 { |
|
1538 LOAD_VECTORSC (dest, src, mask); |
|
1539 |
|
1540 vdest = pix_add_mul (vdest, |
|
1541 negate (pix_multiply (vmask, splat_alpha (vsrc))), |
|
1542 pix_multiply (vsrc, vmask), |
|
1543 negate (splat_alpha (vdest))); |
|
1544 |
|
1545 STORE_VECTOR (dest); |
|
1546 |
|
1547 src += 4; |
|
1548 dest += 4; |
|
1549 mask += 4; |
|
1550 } |
|
1551 |
|
1552 for (i = width % 4; --i >= 0;) |
|
1553 { |
|
1554 uint32_t a = mask[i]; |
|
1555 uint32_t s = src[i]; |
|
1556 uint32_t d = dest[i]; |
|
1557 uint32_t sa = ALPHA_8 (s); |
|
1558 uint32_t da = ALPHA_8 (~d); |
|
1559 |
|
1560 UN8x4_MUL_UN8x4 (s, a); |
|
1561 UN8x4_MUL_UN8 (a, sa); |
|
1562 UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); |
|
1563 |
|
1564 dest[i] = d; |
|
1565 } |
|
1566 } |
|
1567 |
|
1568 static void |
|
1569 vmx_combine_add_ca (pixman_implementation_t *imp, |
|
1570 pixman_op_t op, |
|
1571 uint32_t * dest, |
|
1572 const uint32_t * src, |
|
1573 const uint32_t * mask, |
|
1574 int width) |
|
1575 { |
|
1576 int i; |
|
1577 vector unsigned int vdest, vsrc, vmask; |
|
1578 vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, |
|
1579 dest_mask, mask_mask, src_mask, store_mask; |
|
1580 |
|
1581 COMPUTE_SHIFT_MASKC (dest, src, mask); |
|
1582 |
|
1583 /* printf ("%s\n",__PRETTY_FUNCTION__); */ |
|
1584 for (i = width / 4; i > 0; i--) |
|
1585 { |
|
1586 LOAD_VECTORSC (dest, src, mask); |
|
1587 |
|
1588 vdest = pix_add (pix_multiply (vsrc, vmask), vdest); |
|
1589 |
|
1590 STORE_VECTOR (dest); |
|
1591 |
|
1592 src += 4; |
|
1593 dest += 4; |
|
1594 mask += 4; |
|
1595 } |
|
1596 |
|
1597 for (i = width % 4; --i >= 0;) |
|
1598 { |
|
1599 uint32_t a = mask[i]; |
|
1600 uint32_t s = src[i]; |
|
1601 uint32_t d = dest[i]; |
|
1602 |
|
1603 UN8x4_MUL_UN8x4 (s, a); |
|
1604 UN8x4_ADD_UN8x4 (s, d); |
|
1605 |
|
1606 dest[i] = s; |
|
1607 } |
|
1608 } |
|
1609 |
|
1610 static const pixman_fast_path_t vmx_fast_paths[] = |
|
1611 { |
|
1612 { PIXMAN_OP_NONE }, |
|
1613 }; |
|
1614 |
|
1615 pixman_implementation_t * |
|
1616 _pixman_implementation_create_vmx (pixman_implementation_t *fallback) |
|
1617 { |
|
1618 pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); |
|
1619 |
|
1620 /* Set up function pointers */ |
|
1621 |
|
1622 imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; |
|
1623 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; |
|
1624 imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; |
|
1625 imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; |
|
1626 imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; |
|
1627 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; |
|
1628 imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; |
|
1629 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; |
|
1630 imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; |
|
1631 |
|
1632 imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; |
|
1633 |
|
1634 imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; |
|
1635 imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; |
|
1636 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; |
|
1637 imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; |
|
1638 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; |
|
1639 imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; |
|
1640 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; |
|
1641 imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; |
|
1642 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; |
|
1643 imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; |
|
1644 imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; |
|
1645 |
|
1646 return imp; |
|
1647 } |