1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-vmx.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1647 @@ 1.4 +/* 1.5 + * Copyright © 2007 Luca Barbato 1.6 + * 1.7 + * Permission to use, copy, modify, distribute, and sell this software and its 1.8 + * documentation for any purpose is hereby granted without fee, provided that 1.9 + * the above copyright notice appear in all copies and that both that 1.10 + * copyright notice and this permission notice appear in supporting 1.11 + * documentation, and that the name of Luca Barbato not be used in advertising or 1.12 + * publicity pertaining to distribution of the software without specific, 1.13 + * written prior permission. Luca Barbato makes no representations about the 1.14 + * suitability of this software for any purpose. It is provided "as is" 1.15 + * without express or implied warranty. 1.16 + * 1.17 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 1.18 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.19 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 1.20 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1.21 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 1.22 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 1.23 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 1.24 + * SOFTWARE. 1.25 + * 1.26 + * Author: Luca Barbato (lu_zero@gentoo.org) 1.27 + * 1.28 + * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell 1.29 + */ 1.30 + 1.31 +#include <config.h> 1.32 +#include "pixman-private.h" 1.33 +#include "pixman-combine32.h" 1.34 +#include <altivec.h> 1.35 + 1.36 +#define AVV(x...) {x} 1.37 + 1.38 +static force_inline vector unsigned int 1.39 +splat_alpha (vector unsigned int pix) 1.40 +{ 1.41 + return vec_perm (pix, pix, 1.42 + (vector unsigned char)AVV ( 1.43 + 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, 1.44 + 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); 1.45 +} 1.46 + 1.47 +static force_inline vector unsigned int 1.48 +pix_multiply (vector unsigned int p, vector unsigned int a) 1.49 +{ 1.50 + vector unsigned short hi, lo, mod; 1.51 + 1.52 + /* unpack to short */ 1.53 + hi = (vector unsigned short) 1.54 + vec_mergeh ((vector unsigned char)AVV (0), 1.55 + (vector unsigned char)p); 1.56 + 1.57 + mod = (vector unsigned short) 1.58 + vec_mergeh ((vector unsigned char)AVV (0), 1.59 + (vector unsigned char)a); 1.60 + 1.61 + hi = vec_mladd (hi, mod, (vector unsigned short) 1.62 + AVV (0x0080, 0x0080, 0x0080, 0x0080, 1.63 + 0x0080, 0x0080, 0x0080, 0x0080)); 1.64 + 1.65 + hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); 1.66 + 1.67 + hi = vec_sr (hi, vec_splat_u16 (8)); 1.68 + 1.69 + /* unpack to short */ 1.70 + lo = (vector unsigned short) 1.71 + vec_mergel ((vector unsigned char)AVV (0), 1.72 + (vector unsigned char)p); 1.73 + mod = (vector unsigned short) 1.74 + vec_mergel ((vector unsigned char)AVV (0), 1.75 + (vector unsigned char)a); 1.76 + 1.77 + lo = vec_mladd (lo, mod, (vector unsigned short) 1.78 + AVV (0x0080, 0x0080, 0x0080, 0x0080, 1.79 + 0x0080, 0x0080, 0x0080, 0x0080)); 1.80 + 1.81 + lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); 1.82 + 1.83 + lo = vec_sr (lo, vec_splat_u16 (8)); 1.84 + 1.85 + return (vector unsigned int)vec_packsu (hi, lo); 1.86 +} 1.87 + 1.88 +static force_inline vector unsigned int 1.89 +pix_add (vector unsigned int a, vector unsigned int b) 1.90 +{ 1.91 + return (vector unsigned int)vec_adds ((vector unsigned char)a, 1.92 + (vector unsigned char)b); 1.93 +} 1.94 + 1.95 +static force_inline vector unsigned int 1.96 +pix_add_mul (vector unsigned int x, 1.97 + vector unsigned int a, 1.98 + vector unsigned int y, 1.99 + vector unsigned int b) 1.100 +{ 1.101 + vector unsigned int t1, t2; 1.102 + 1.103 + t1 = pix_multiply (x, a); 1.104 + t2 = pix_multiply (y, b); 1.105 + 1.106 + return pix_add (t1, t2); 1.107 +} 1.108 + 1.109 +static force_inline vector unsigned int 1.110 +negate (vector unsigned int src) 1.111 +{ 1.112 + return vec_nor (src, src); 1.113 +} 1.114 + 1.115 +/* dest*~srca + src */ 1.116 +static force_inline vector unsigned int 1.117 +over (vector unsigned int src, 1.118 + vector unsigned int srca, 1.119 + vector unsigned int dest) 1.120 +{ 1.121 + vector unsigned char tmp = (vector unsigned char) 1.122 + pix_multiply (dest, negate (srca)); 1.123 + 1.124 + tmp = vec_adds ((vector unsigned char)src, tmp); 1.125 + return (vector unsigned int)tmp; 1.126 +} 1.127 + 1.128 +/* in == pix_multiply */ 1.129 +#define in_over(src, srca, mask, dest) \ 1.130 + over (pix_multiply (src, mask), \ 1.131 + pix_multiply (srca, mask), dest) 1.132 + 1.133 + 1.134 +#define COMPUTE_SHIFT_MASK(source) \ 1.135 + source ## _mask = vec_lvsl (0, source); 1.136 + 1.137 +#define COMPUTE_SHIFT_MASKS(dest, source) \ 1.138 + dest ## _mask = vec_lvsl (0, dest); \ 1.139 + source ## _mask = vec_lvsl (0, source); \ 1.140 + store_mask = vec_lvsr (0, dest); 1.141 + 1.142 +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ 1.143 + mask ## _mask = vec_lvsl (0, mask); \ 1.144 + dest ## _mask = vec_lvsl (0, dest); \ 1.145 + source ## _mask = vec_lvsl (0, source); \ 1.146 + store_mask = vec_lvsr (0, dest); 1.147 + 1.148 +/* notice you have to declare temp vars... 1.149 + * Note: tmp3 and tmp4 must remain untouched! 1.150 + */ 1.151 + 1.152 +#define LOAD_VECTORS(dest, source) \ 1.153 + tmp1 = (typeof(tmp1))vec_ld (0, source); \ 1.154 + tmp2 = (typeof(tmp2))vec_ld (15, source); \ 1.155 + tmp3 = (typeof(tmp3))vec_ld (0, dest); \ 1.156 + v ## source = (typeof(v ## source)) \ 1.157 + vec_perm (tmp1, tmp2, source ## _mask); \ 1.158 + tmp4 = (typeof(tmp4))vec_ld (15, dest); \ 1.159 + v ## dest = (typeof(v ## dest)) \ 1.160 + vec_perm (tmp3, tmp4, dest ## _mask); 1.161 + 1.162 +#define LOAD_VECTORSC(dest, source, mask) \ 1.163 + tmp1 = (typeof(tmp1))vec_ld (0, source); \ 1.164 + tmp2 = (typeof(tmp2))vec_ld (15, source); \ 1.165 + tmp3 = (typeof(tmp3))vec_ld (0, dest); \ 1.166 + v ## source = (typeof(v ## source)) \ 1.167 + vec_perm (tmp1, tmp2, source ## _mask); \ 1.168 + tmp4 = (typeof(tmp4))vec_ld (15, dest); \ 1.169 + tmp1 = (typeof(tmp1))vec_ld (0, mask); \ 1.170 + v ## dest = (typeof(v ## dest)) \ 1.171 + vec_perm (tmp3, tmp4, dest ## _mask); \ 1.172 + tmp2 = (typeof(tmp2))vec_ld (15, mask); \ 1.173 + v ## mask = (typeof(v ## mask)) \ 1.174 + vec_perm (tmp1, tmp2, mask ## _mask); 1.175 + 1.176 +#define LOAD_VECTORSM(dest, source, mask) \ 1.177 + LOAD_VECTORSC (dest, source, mask) \ 1.178 + v ## source = pix_multiply (v ## source, \ 1.179 + splat_alpha (v ## mask)); 1.180 + 1.181 +#define STORE_VECTOR(dest) \ 1.182 + edges = vec_perm (tmp4, tmp3, dest ## _mask); \ 1.183 + tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ 1.184 + tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ 1.185 + vec_st ((vector unsigned int) tmp3, 15, dest); \ 1.186 + vec_st ((vector unsigned int) tmp1, 0, dest); 1.187 + 1.188 +static void 1.189 +vmx_combine_over_u_no_mask (uint32_t * dest, 1.190 + const uint32_t *src, 1.191 + int width) 1.192 +{ 1.193 + int i; 1.194 + vector unsigned int vdest, vsrc; 1.195 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.196 + dest_mask, src_mask, store_mask; 1.197 + 1.198 + COMPUTE_SHIFT_MASKS (dest, src); 1.199 + 1.200 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.201 + for (i = width / 4; i > 0; i--) 1.202 + { 1.203 + 1.204 + LOAD_VECTORS (dest, src); 1.205 + 1.206 + vdest = over (vsrc, splat_alpha (vsrc), vdest); 1.207 + 1.208 + STORE_VECTOR (dest); 1.209 + 1.210 + src += 4; 1.211 + dest += 4; 1.212 + } 1.213 + 1.214 + for (i = width % 4; --i >= 0;) 1.215 + { 1.216 + uint32_t s = src[i]; 1.217 + uint32_t d = dest[i]; 1.218 + uint32_t ia = ALPHA_8 (~s); 1.219 + 1.220 + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); 1.221 + 1.222 + dest[i] = d; 1.223 + } 1.224 +} 1.225 + 1.226 +static void 1.227 +vmx_combine_over_u_mask (uint32_t * dest, 1.228 + const uint32_t *src, 1.229 + const uint32_t *mask, 1.230 + int width) 1.231 +{ 1.232 + int i; 1.233 + vector unsigned int vdest, vsrc, vmask; 1.234 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.235 + dest_mask, src_mask, mask_mask, store_mask; 1.236 + 1.237 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.238 + 1.239 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.240 + for (i = width / 4; i > 0; i--) 1.241 + { 1.242 + LOAD_VECTORSM (dest, src, mask); 1.243 + 1.244 + vdest = over (vsrc, splat_alpha (vsrc), vdest); 1.245 + 1.246 + STORE_VECTOR (dest); 1.247 + 1.248 + src += 4; 1.249 + dest += 4; 1.250 + mask += 4; 1.251 + } 1.252 + 1.253 + for (i = width % 4; --i >= 0;) 1.254 + { 1.255 + uint32_t m = ALPHA_8 (mask[i]); 1.256 + uint32_t s = src[i]; 1.257 + uint32_t d = dest[i]; 1.258 + uint32_t ia; 1.259 + 1.260 + UN8x4_MUL_UN8 (s, m); 1.261 + 1.262 + ia = ALPHA_8 (~s); 1.263 + 1.264 + UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); 1.265 + dest[i] = d; 1.266 + } 1.267 +} 1.268 + 1.269 +static void 1.270 +vmx_combine_over_u (pixman_implementation_t *imp, 1.271 + pixman_op_t op, 1.272 + uint32_t * dest, 1.273 + const uint32_t * src, 1.274 + const uint32_t * mask, 1.275 + int width) 1.276 +{ 1.277 + if (mask) 1.278 + vmx_combine_over_u_mask (dest, src, mask, width); 1.279 + else 1.280 + vmx_combine_over_u_no_mask (dest, src, width); 1.281 +} 1.282 + 1.283 +static void 1.284 +vmx_combine_over_reverse_u_no_mask (uint32_t * dest, 1.285 + const uint32_t *src, 1.286 + int width) 1.287 +{ 1.288 + int i; 1.289 + vector unsigned int vdest, vsrc; 1.290 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.291 + dest_mask, src_mask, store_mask; 1.292 + 1.293 + COMPUTE_SHIFT_MASKS (dest, src); 1.294 + 1.295 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.296 + for (i = width / 4; i > 0; i--) 1.297 + { 1.298 + 1.299 + LOAD_VECTORS (dest, src); 1.300 + 1.301 + vdest = over (vdest, splat_alpha (vdest), vsrc); 1.302 + 1.303 + STORE_VECTOR (dest); 1.304 + 1.305 + src += 4; 1.306 + dest += 4; 1.307 + } 1.308 + 1.309 + for (i = width % 4; --i >= 0;) 1.310 + { 1.311 + uint32_t s = src[i]; 1.312 + uint32_t d = dest[i]; 1.313 + uint32_t ia = ALPHA_8 (~dest[i]); 1.314 + 1.315 + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); 1.316 + dest[i] = s; 1.317 + } 1.318 +} 1.319 + 1.320 +static void 1.321 +vmx_combine_over_reverse_u_mask (uint32_t * dest, 1.322 + const uint32_t *src, 1.323 + const uint32_t *mask, 1.324 + int width) 1.325 +{ 1.326 + int i; 1.327 + vector unsigned int vdest, vsrc, vmask; 1.328 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.329 + dest_mask, src_mask, mask_mask, store_mask; 1.330 + 1.331 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.332 + 1.333 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.334 + for (i = width / 4; i > 0; i--) 1.335 + { 1.336 + 1.337 + LOAD_VECTORSM (dest, src, mask); 1.338 + 1.339 + vdest = over (vdest, splat_alpha (vdest), vsrc); 1.340 + 1.341 + STORE_VECTOR (dest); 1.342 + 1.343 + src += 4; 1.344 + dest += 4; 1.345 + mask += 4; 1.346 + } 1.347 + 1.348 + for (i = width % 4; --i >= 0;) 1.349 + { 1.350 + uint32_t m = ALPHA_8 (mask[i]); 1.351 + uint32_t s = src[i]; 1.352 + uint32_t d = dest[i]; 1.353 + uint32_t ia = ALPHA_8 (~dest[i]); 1.354 + 1.355 + UN8x4_MUL_UN8 (s, m); 1.356 + 1.357 + UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); 1.358 + dest[i] = s; 1.359 + } 1.360 +} 1.361 + 1.362 +static void 1.363 +vmx_combine_over_reverse_u (pixman_implementation_t *imp, 1.364 + pixman_op_t op, 1.365 + uint32_t * dest, 1.366 + const uint32_t * src, 1.367 + const uint32_t * mask, 1.368 + int width) 1.369 +{ 1.370 + if (mask) 1.371 + vmx_combine_over_reverse_u_mask (dest, src, mask, width); 1.372 + else 1.373 + vmx_combine_over_reverse_u_no_mask (dest, src, width); 1.374 +} 1.375 + 1.376 +static void 1.377 +vmx_combine_in_u_no_mask (uint32_t * dest, 1.378 + const uint32_t *src, 1.379 + int width) 1.380 +{ 1.381 + int i; 1.382 + vector unsigned int vdest, vsrc; 1.383 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.384 + dest_mask, src_mask, store_mask; 1.385 + 1.386 + COMPUTE_SHIFT_MASKS (dest, src); 1.387 + 1.388 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.389 + for (i = width / 4; i > 0; i--) 1.390 + { 1.391 + LOAD_VECTORS (dest, src); 1.392 + 1.393 + vdest = pix_multiply (vsrc, splat_alpha (vdest)); 1.394 + 1.395 + STORE_VECTOR (dest); 1.396 + 1.397 + src += 4; 1.398 + dest += 4; 1.399 + } 1.400 + 1.401 + for (i = width % 4; --i >= 0;) 1.402 + { 1.403 + uint32_t s = src[i]; 1.404 + uint32_t a = ALPHA_8 (dest[i]); 1.405 + 1.406 + UN8x4_MUL_UN8 (s, a); 1.407 + dest[i] = s; 1.408 + } 1.409 +} 1.410 + 1.411 +static void 1.412 +vmx_combine_in_u_mask (uint32_t * dest, 1.413 + const uint32_t *src, 1.414 + const uint32_t *mask, 1.415 + int width) 1.416 +{ 1.417 + int i; 1.418 + vector unsigned int vdest, vsrc, vmask; 1.419 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.420 + dest_mask, src_mask, mask_mask, store_mask; 1.421 + 1.422 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.423 + 1.424 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.425 + for (i = width / 4; i > 0; i--) 1.426 + { 1.427 + LOAD_VECTORSM (dest, src, mask); 1.428 + 1.429 + vdest = pix_multiply (vsrc, splat_alpha (vdest)); 1.430 + 1.431 + STORE_VECTOR (dest); 1.432 + 1.433 + src += 4; 1.434 + dest += 4; 1.435 + mask += 4; 1.436 + } 1.437 + 1.438 + for (i = width % 4; --i >= 0;) 1.439 + { 1.440 + uint32_t m = ALPHA_8 (mask[i]); 1.441 + uint32_t s = src[i]; 1.442 + uint32_t a = ALPHA_8 (dest[i]); 1.443 + 1.444 + UN8x4_MUL_UN8 (s, m); 1.445 + UN8x4_MUL_UN8 (s, a); 1.446 + 1.447 + dest[i] = s; 1.448 + } 1.449 +} 1.450 + 1.451 +static void 1.452 +vmx_combine_in_u (pixman_implementation_t *imp, 1.453 + pixman_op_t op, 1.454 + uint32_t * dest, 1.455 + const uint32_t * src, 1.456 + const uint32_t * mask, 1.457 + int width) 1.458 +{ 1.459 + if (mask) 1.460 + vmx_combine_in_u_mask (dest, src, mask, width); 1.461 + else 1.462 + vmx_combine_in_u_no_mask (dest, src, width); 1.463 +} 1.464 + 1.465 +static void 1.466 +vmx_combine_in_reverse_u_no_mask (uint32_t * dest, 1.467 + const uint32_t *src, 1.468 + int width) 1.469 +{ 1.470 + int i; 1.471 + vector unsigned int vdest, vsrc; 1.472 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.473 + dest_mask, src_mask, store_mask; 1.474 + 1.475 + COMPUTE_SHIFT_MASKS (dest, src); 1.476 + 1.477 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.478 + for (i = width / 4; i > 0; i--) 1.479 + { 1.480 + LOAD_VECTORS (dest, src); 1.481 + 1.482 + vdest = pix_multiply (vdest, splat_alpha (vsrc)); 1.483 + 1.484 + STORE_VECTOR (dest); 1.485 + 1.486 + src += 4; 1.487 + dest += 4; 1.488 + } 1.489 + 1.490 + for (i = width % 4; --i >= 0;) 1.491 + { 1.492 + uint32_t d = dest[i]; 1.493 + uint32_t a = ALPHA_8 (src[i]); 1.494 + 1.495 + UN8x4_MUL_UN8 (d, a); 1.496 + 1.497 + dest[i] = d; 1.498 + } 1.499 +} 1.500 + 1.501 +static void 1.502 +vmx_combine_in_reverse_u_mask (uint32_t * dest, 1.503 + const uint32_t *src, 1.504 + const uint32_t *mask, 1.505 + int width) 1.506 +{ 1.507 + int i; 1.508 + vector unsigned int vdest, vsrc, vmask; 1.509 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.510 + dest_mask, src_mask, mask_mask, store_mask; 1.511 + 1.512 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.513 + 1.514 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.515 + for (i = width / 4; i > 0; i--) 1.516 + { 1.517 + LOAD_VECTORSM (dest, src, mask); 1.518 + 1.519 + vdest = pix_multiply (vdest, splat_alpha (vsrc)); 1.520 + 1.521 + STORE_VECTOR (dest); 1.522 + 1.523 + src += 4; 1.524 + dest += 4; 1.525 + mask += 4; 1.526 + } 1.527 + 1.528 + for (i = width % 4; --i >= 0;) 1.529 + { 1.530 + uint32_t m = ALPHA_8 (mask[i]); 1.531 + uint32_t d = dest[i]; 1.532 + uint32_t a = src[i]; 1.533 + 1.534 + UN8x4_MUL_UN8 (a, m); 1.535 + a = ALPHA_8 (a); 1.536 + UN8x4_MUL_UN8 (d, a); 1.537 + 1.538 + dest[i] = d; 1.539 + } 1.540 +} 1.541 + 1.542 +static void 1.543 +vmx_combine_in_reverse_u (pixman_implementation_t *imp, 1.544 + pixman_op_t op, 1.545 + uint32_t * dest, 1.546 + const uint32_t * src, 1.547 + const uint32_t * mask, 1.548 + int width) 1.549 +{ 1.550 + if (mask) 1.551 + vmx_combine_in_reverse_u_mask (dest, src, mask, width); 1.552 + else 1.553 + vmx_combine_in_reverse_u_no_mask (dest, src, width); 1.554 +} 1.555 + 1.556 +static void 1.557 +vmx_combine_out_u_no_mask (uint32_t * dest, 1.558 + const uint32_t *src, 1.559 + int width) 1.560 +{ 1.561 + int i; 1.562 + vector unsigned int vdest, vsrc; 1.563 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.564 + dest_mask, src_mask, store_mask; 1.565 + 1.566 + COMPUTE_SHIFT_MASKS (dest, src); 1.567 + 1.568 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.569 + for (i = width / 4; i > 0; i--) 1.570 + { 1.571 + LOAD_VECTORS (dest, src); 1.572 + 1.573 + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); 1.574 + 1.575 + STORE_VECTOR (dest); 1.576 + 1.577 + src += 4; 1.578 + dest += 4; 1.579 + } 1.580 + 1.581 + for (i = width % 4; --i >= 0;) 1.582 + { 1.583 + uint32_t s = src[i]; 1.584 + uint32_t a = ALPHA_8 (~dest[i]); 1.585 + 1.586 + UN8x4_MUL_UN8 (s, a); 1.587 + 1.588 + dest[i] = s; 1.589 + } 1.590 +} 1.591 + 1.592 +static void 1.593 +vmx_combine_out_u_mask (uint32_t * dest, 1.594 + const uint32_t *src, 1.595 + const uint32_t *mask, 1.596 + int width) 1.597 +{ 1.598 + int i; 1.599 + vector unsigned int vdest, vsrc, vmask; 1.600 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.601 + dest_mask, src_mask, mask_mask, store_mask; 1.602 + 1.603 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.604 + 1.605 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.606 + for (i = width / 4; i > 0; i--) 1.607 + { 1.608 + LOAD_VECTORSM (dest, src, mask); 1.609 + 1.610 + vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); 1.611 + 1.612 + STORE_VECTOR (dest); 1.613 + 1.614 + src += 4; 1.615 + dest += 4; 1.616 + mask += 4; 1.617 + } 1.618 + 1.619 + for (i = width % 4; --i >= 0;) 1.620 + { 1.621 + uint32_t m = ALPHA_8 (mask[i]); 1.622 + uint32_t s = src[i]; 1.623 + uint32_t a = ALPHA_8 (~dest[i]); 1.624 + 1.625 + UN8x4_MUL_UN8 (s, m); 1.626 + UN8x4_MUL_UN8 (s, a); 1.627 + 1.628 + dest[i] = s; 1.629 + } 1.630 +} 1.631 + 1.632 +static void 1.633 +vmx_combine_out_u (pixman_implementation_t *imp, 1.634 + pixman_op_t op, 1.635 + uint32_t * dest, 1.636 + const uint32_t * src, 1.637 + const uint32_t * mask, 1.638 + int width) 1.639 +{ 1.640 + if (mask) 1.641 + vmx_combine_out_u_mask (dest, src, mask, width); 1.642 + else 1.643 + vmx_combine_out_u_no_mask (dest, src, width); 1.644 +} 1.645 + 1.646 +static void 1.647 +vmx_combine_out_reverse_u_no_mask (uint32_t * dest, 1.648 + const uint32_t *src, 1.649 + int width) 1.650 +{ 1.651 + int i; 1.652 + vector unsigned int vdest, vsrc; 1.653 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.654 + dest_mask, src_mask, store_mask; 1.655 + 1.656 + COMPUTE_SHIFT_MASKS (dest, src); 1.657 + 1.658 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.659 + for (i = width / 4; i > 0; i--) 1.660 + { 1.661 + 1.662 + LOAD_VECTORS (dest, src); 1.663 + 1.664 + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); 1.665 + 1.666 + STORE_VECTOR (dest); 1.667 + 1.668 + src += 4; 1.669 + dest += 4; 1.670 + } 1.671 + 1.672 + for (i = width % 4; --i >= 0;) 1.673 + { 1.674 + uint32_t d = dest[i]; 1.675 + uint32_t a = ALPHA_8 (~src[i]); 1.676 + 1.677 + UN8x4_MUL_UN8 (d, a); 1.678 + 1.679 + dest[i] = d; 1.680 + } 1.681 +} 1.682 + 1.683 +static void 1.684 +vmx_combine_out_reverse_u_mask (uint32_t * dest, 1.685 + const uint32_t *src, 1.686 + const uint32_t *mask, 1.687 + int width) 1.688 +{ 1.689 + int i; 1.690 + vector unsigned int vdest, vsrc, vmask; 1.691 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.692 + dest_mask, src_mask, mask_mask, store_mask; 1.693 + 1.694 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.695 + 1.696 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.697 + for (i = width / 4; i > 0; i--) 1.698 + { 1.699 + LOAD_VECTORSM (dest, src, mask); 1.700 + 1.701 + vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); 1.702 + 1.703 + STORE_VECTOR (dest); 1.704 + 1.705 + src += 4; 1.706 + dest += 4; 1.707 + mask += 4; 1.708 + } 1.709 + 1.710 + for (i = width % 4; --i >= 0;) 1.711 + { 1.712 + uint32_t m = ALPHA_8 (mask[i]); 1.713 + uint32_t d = dest[i]; 1.714 + uint32_t a = src[i]; 1.715 + 1.716 + UN8x4_MUL_UN8 (a, m); 1.717 + a = ALPHA_8 (~a); 1.718 + UN8x4_MUL_UN8 (d, a); 1.719 + 1.720 + dest[i] = d; 1.721 + } 1.722 +} 1.723 + 1.724 +static void 1.725 +vmx_combine_out_reverse_u (pixman_implementation_t *imp, 1.726 + pixman_op_t op, 1.727 + uint32_t * dest, 1.728 + const uint32_t * src, 1.729 + const uint32_t * mask, 1.730 + int width) 1.731 +{ 1.732 + if (mask) 1.733 + vmx_combine_out_reverse_u_mask (dest, src, mask, width); 1.734 + else 1.735 + vmx_combine_out_reverse_u_no_mask (dest, src, width); 1.736 +} 1.737 + 1.738 +static void 1.739 +vmx_combine_atop_u_no_mask (uint32_t * dest, 1.740 + const uint32_t *src, 1.741 + int width) 1.742 +{ 1.743 + int i; 1.744 + vector unsigned int vdest, vsrc; 1.745 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.746 + dest_mask, src_mask, store_mask; 1.747 + 1.748 + COMPUTE_SHIFT_MASKS (dest, src); 1.749 + 1.750 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.751 + for (i = width / 4; i > 0; i--) 1.752 + { 1.753 + LOAD_VECTORS (dest, src); 1.754 + 1.755 + vdest = pix_add_mul (vsrc, splat_alpha (vdest), 1.756 + vdest, splat_alpha (negate (vsrc))); 1.757 + 1.758 + STORE_VECTOR (dest); 1.759 + 1.760 + src += 4; 1.761 + dest += 4; 1.762 + } 1.763 + 1.764 + for (i = width % 4; --i >= 0;) 1.765 + { 1.766 + uint32_t s = src[i]; 1.767 + uint32_t d = dest[i]; 1.768 + uint32_t dest_a = ALPHA_8 (d); 1.769 + uint32_t src_ia = ALPHA_8 (~s); 1.770 + 1.771 + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); 1.772 + 1.773 + dest[i] = s; 1.774 + } 1.775 +} 1.776 + 1.777 +static void 1.778 +vmx_combine_atop_u_mask (uint32_t * dest, 1.779 + const uint32_t *src, 1.780 + const uint32_t *mask, 1.781 + int width) 1.782 +{ 1.783 + int i; 1.784 + vector unsigned int vdest, vsrc, vmask; 1.785 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.786 + dest_mask, src_mask, mask_mask, store_mask; 1.787 + 1.788 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.789 + 1.790 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.791 + for (i = width / 4; i > 0; i--) 1.792 + { 1.793 + LOAD_VECTORSM (dest, src, mask); 1.794 + 1.795 + vdest = pix_add_mul (vsrc, splat_alpha (vdest), 1.796 + vdest, splat_alpha (negate (vsrc))); 1.797 + 1.798 + STORE_VECTOR (dest); 1.799 + 1.800 + src += 4; 1.801 + dest += 4; 1.802 + mask += 4; 1.803 + } 1.804 + 1.805 + for (i = width % 4; --i >= 0;) 1.806 + { 1.807 + uint32_t m = ALPHA_8 (mask[i]); 1.808 + uint32_t s = src[i]; 1.809 + uint32_t d = dest[i]; 1.810 + uint32_t dest_a = ALPHA_8 (d); 1.811 + uint32_t src_ia; 1.812 + 1.813 + UN8x4_MUL_UN8 (s, m); 1.814 + 1.815 + src_ia = ALPHA_8 (~s); 1.816 + 1.817 + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); 1.818 + 1.819 + dest[i] = s; 1.820 + } 1.821 +} 1.822 + 1.823 +static void 1.824 +vmx_combine_atop_u (pixman_implementation_t *imp, 1.825 + pixman_op_t op, 1.826 + uint32_t * dest, 1.827 + const uint32_t * src, 1.828 + const uint32_t * mask, 1.829 + int width) 1.830 +{ 1.831 + if (mask) 1.832 + vmx_combine_atop_u_mask (dest, src, mask, width); 1.833 + else 1.834 + vmx_combine_atop_u_no_mask (dest, src, width); 1.835 +} 1.836 + 1.837 +static void 1.838 +vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, 1.839 + const uint32_t *src, 1.840 + int width) 1.841 +{ 1.842 + int i; 1.843 + vector unsigned int vdest, vsrc; 1.844 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.845 + dest_mask, src_mask, store_mask; 1.846 + 1.847 + COMPUTE_SHIFT_MASKS (dest, src); 1.848 + 1.849 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.850 + for (i = width / 4; i > 0; i--) 1.851 + { 1.852 + LOAD_VECTORS (dest, src); 1.853 + 1.854 + vdest = pix_add_mul (vdest, splat_alpha (vsrc), 1.855 + vsrc, splat_alpha (negate (vdest))); 1.856 + 1.857 + STORE_VECTOR (dest); 1.858 + 1.859 + src += 4; 1.860 + dest += 4; 1.861 + } 1.862 + 1.863 + for (i = width % 4; --i >= 0;) 1.864 + { 1.865 + uint32_t s = src[i]; 1.866 + uint32_t d = dest[i]; 1.867 + uint32_t src_a = ALPHA_8 (s); 1.868 + uint32_t dest_ia = ALPHA_8 (~d); 1.869 + 1.870 + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); 1.871 + 1.872 + dest[i] = s; 1.873 + } 1.874 +} 1.875 + 1.876 +static void 1.877 +vmx_combine_atop_reverse_u_mask (uint32_t * dest, 1.878 + const uint32_t *src, 1.879 + const uint32_t *mask, 1.880 + int width) 1.881 +{ 1.882 + int i; 1.883 + vector unsigned int vdest, vsrc, vmask; 1.884 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.885 + dest_mask, src_mask, mask_mask, store_mask; 1.886 + 1.887 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.888 + 1.889 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.890 + for (i = width / 4; i > 0; i--) 1.891 + { 1.892 + LOAD_VECTORSM (dest, src, mask); 1.893 + 1.894 + vdest = pix_add_mul (vdest, splat_alpha (vsrc), 1.895 + vsrc, splat_alpha (negate (vdest))); 1.896 + 1.897 + STORE_VECTOR (dest); 1.898 + 1.899 + src += 4; 1.900 + dest += 4; 1.901 + mask += 4; 1.902 + } 1.903 + 1.904 + for (i = width % 4; --i >= 0;) 1.905 + { 1.906 + uint32_t m = ALPHA_8 (mask[i]); 1.907 + uint32_t s = src[i]; 1.908 + uint32_t d = dest[i]; 1.909 + uint32_t src_a; 1.910 + uint32_t dest_ia = ALPHA_8 (~d); 1.911 + 1.912 + UN8x4_MUL_UN8 (s, m); 1.913 + 1.914 + src_a = ALPHA_8 (s); 1.915 + 1.916 + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); 1.917 + 1.918 + dest[i] = s; 1.919 + } 1.920 +} 1.921 + 1.922 +static void 1.923 +vmx_combine_atop_reverse_u (pixman_implementation_t *imp, 1.924 + pixman_op_t op, 1.925 + uint32_t * dest, 1.926 + const uint32_t * src, 1.927 + const uint32_t * mask, 1.928 + int width) 1.929 +{ 1.930 + if (mask) 1.931 + vmx_combine_atop_reverse_u_mask (dest, src, mask, width); 1.932 + else 1.933 + vmx_combine_atop_reverse_u_no_mask (dest, src, width); 1.934 +} 1.935 + 1.936 +static void 1.937 +vmx_combine_xor_u_no_mask (uint32_t * dest, 1.938 + const uint32_t *src, 1.939 + int width) 1.940 +{ 1.941 + int i; 1.942 + vector unsigned int vdest, vsrc; 1.943 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.944 + dest_mask, src_mask, store_mask; 1.945 + 1.946 + COMPUTE_SHIFT_MASKS (dest, src); 1.947 + 1.948 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.949 + for (i = width / 4; i > 0; i--) 1.950 + { 1.951 + LOAD_VECTORS (dest, src); 1.952 + 1.953 + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), 1.954 + vdest, splat_alpha (negate (vsrc))); 1.955 + 1.956 + STORE_VECTOR (dest); 1.957 + 1.958 + src += 4; 1.959 + dest += 4; 1.960 + } 1.961 + 1.962 + for (i = width % 4; --i >= 0;) 1.963 + { 1.964 + uint32_t s = src[i]; 1.965 + uint32_t d = dest[i]; 1.966 + uint32_t src_ia = ALPHA_8 (~s); 1.967 + uint32_t dest_ia = ALPHA_8 (~d); 1.968 + 1.969 + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); 1.970 + 1.971 + dest[i] = s; 1.972 + } 1.973 +} 1.974 + 1.975 +static void 1.976 +vmx_combine_xor_u_mask (uint32_t * dest, 1.977 + const uint32_t *src, 1.978 + const uint32_t *mask, 1.979 + int width) 1.980 +{ 1.981 + int i; 1.982 + vector unsigned int vdest, vsrc, vmask; 1.983 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.984 + dest_mask, src_mask, mask_mask, store_mask; 1.985 + 1.986 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.987 + 1.988 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.989 + for (i = width / 4; i > 0; i--) 1.990 + { 1.991 + LOAD_VECTORSM (dest, src, mask); 1.992 + 1.993 + vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), 1.994 + vdest, splat_alpha (negate (vsrc))); 1.995 + 1.996 + STORE_VECTOR (dest); 1.997 + 1.998 + src += 4; 1.999 + dest += 4; 1.1000 + mask += 4; 1.1001 + } 1.1002 + 1.1003 + for (i = width % 4; --i >= 0;) 1.1004 + { 1.1005 + uint32_t m = ALPHA_8 (mask[i]); 1.1006 + uint32_t s = src[i]; 1.1007 + uint32_t d = dest[i]; 1.1008 + uint32_t src_ia; 1.1009 + uint32_t dest_ia = ALPHA_8 (~d); 1.1010 + 1.1011 + UN8x4_MUL_UN8 (s, m); 1.1012 + 1.1013 + src_ia = ALPHA_8 (~s); 1.1014 + 1.1015 + UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); 1.1016 + 1.1017 + dest[i] = s; 1.1018 + } 1.1019 +} 1.1020 + 1.1021 +static void 1.1022 +vmx_combine_xor_u (pixman_implementation_t *imp, 1.1023 + pixman_op_t op, 1.1024 + uint32_t * dest, 1.1025 + const uint32_t * src, 1.1026 + const uint32_t * mask, 1.1027 + int width) 1.1028 +{ 1.1029 + if (mask) 1.1030 + vmx_combine_xor_u_mask (dest, src, mask, width); 1.1031 + else 1.1032 + vmx_combine_xor_u_no_mask (dest, src, width); 1.1033 +} 1.1034 + 1.1035 +static void 1.1036 +vmx_combine_add_u_no_mask (uint32_t * dest, 1.1037 + const uint32_t *src, 1.1038 + int width) 1.1039 +{ 1.1040 + int i; 1.1041 + vector unsigned int vdest, vsrc; 1.1042 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1043 + dest_mask, src_mask, store_mask; 1.1044 + 1.1045 + COMPUTE_SHIFT_MASKS (dest, src); 1.1046 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1047 + for (i = width / 4; i > 0; i--) 1.1048 + { 1.1049 + LOAD_VECTORS (dest, src); 1.1050 + 1.1051 + vdest = pix_add (vsrc, vdest); 1.1052 + 1.1053 + STORE_VECTOR (dest); 1.1054 + 1.1055 + src += 4; 1.1056 + dest += 4; 1.1057 + } 1.1058 + 1.1059 + for (i = width % 4; --i >= 0;) 1.1060 + { 1.1061 + uint32_t s = src[i]; 1.1062 + uint32_t d = dest[i]; 1.1063 + 1.1064 + UN8x4_ADD_UN8x4 (d, s); 1.1065 + 1.1066 + dest[i] = d; 1.1067 + } 1.1068 +} 1.1069 + 1.1070 +static void 1.1071 +vmx_combine_add_u_mask (uint32_t * dest, 1.1072 + const uint32_t *src, 1.1073 + const uint32_t *mask, 1.1074 + int width) 1.1075 +{ 1.1076 + int i; 1.1077 + vector unsigned int vdest, vsrc, vmask; 1.1078 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1079 + dest_mask, src_mask, mask_mask, store_mask; 1.1080 + 1.1081 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1082 + 1.1083 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1084 + for (i = width / 4; i > 0; i--) 1.1085 + { 1.1086 + LOAD_VECTORSM (dest, src, mask); 1.1087 + 1.1088 + vdest = pix_add (vsrc, vdest); 1.1089 + 1.1090 + STORE_VECTOR (dest); 1.1091 + 1.1092 + src += 4; 1.1093 + dest += 4; 1.1094 + mask += 4; 1.1095 + } 1.1096 + 1.1097 + for (i = width % 4; --i >= 0;) 1.1098 + { 1.1099 + uint32_t m = ALPHA_8 (mask[i]); 1.1100 + uint32_t s = src[i]; 1.1101 + uint32_t d = dest[i]; 1.1102 + 1.1103 + UN8x4_MUL_UN8 (s, m); 1.1104 + UN8x4_ADD_UN8x4 (d, s); 1.1105 + 1.1106 + dest[i] = d; 1.1107 + } 1.1108 +} 1.1109 + 1.1110 +static void 1.1111 +vmx_combine_add_u (pixman_implementation_t *imp, 1.1112 + pixman_op_t op, 1.1113 + uint32_t * dest, 1.1114 + const uint32_t * src, 1.1115 + const uint32_t * mask, 1.1116 + int width) 1.1117 +{ 1.1118 + if (mask) 1.1119 + vmx_combine_add_u_mask (dest, src, mask, width); 1.1120 + else 1.1121 + vmx_combine_add_u_no_mask (dest, src, width); 1.1122 +} 1.1123 + 1.1124 +static void 1.1125 +vmx_combine_src_ca (pixman_implementation_t *imp, 1.1126 + pixman_op_t op, 1.1127 + uint32_t * dest, 1.1128 + const uint32_t * src, 1.1129 + const uint32_t * mask, 1.1130 + int width) 1.1131 +{ 1.1132 + int i; 1.1133 + vector unsigned int vdest, vsrc, vmask; 1.1134 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1135 + dest_mask, mask_mask, src_mask, store_mask; 1.1136 + 1.1137 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1138 + 1.1139 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1140 + for (i = width / 4; i > 0; i--) 1.1141 + { 1.1142 + LOAD_VECTORSC (dest, src, mask); 1.1143 + 1.1144 + vdest = pix_multiply (vsrc, vmask); 1.1145 + 1.1146 + STORE_VECTOR (dest); 1.1147 + 1.1148 + mask += 4; 1.1149 + src += 4; 1.1150 + dest += 4; 1.1151 + } 1.1152 + 1.1153 + for (i = width % 4; --i >= 0;) 1.1154 + { 1.1155 + uint32_t a = mask[i]; 1.1156 + uint32_t s = src[i]; 1.1157 + 1.1158 + UN8x4_MUL_UN8x4 (s, a); 1.1159 + 1.1160 + dest[i] = s; 1.1161 + } 1.1162 +} 1.1163 + 1.1164 +static void 1.1165 +vmx_combine_over_ca (pixman_implementation_t *imp, 1.1166 + pixman_op_t op, 1.1167 + uint32_t * dest, 1.1168 + const uint32_t * src, 1.1169 + const uint32_t * mask, 1.1170 + int width) 1.1171 +{ 1.1172 + int i; 1.1173 + vector unsigned int vdest, vsrc, vmask; 1.1174 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1175 + dest_mask, mask_mask, src_mask, store_mask; 1.1176 + 1.1177 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1178 + 1.1179 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1180 + for (i = width / 4; i > 0; i--) 1.1181 + { 1.1182 + LOAD_VECTORSC (dest, src, mask); 1.1183 + 1.1184 + vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); 1.1185 + 1.1186 + STORE_VECTOR (dest); 1.1187 + 1.1188 + mask += 4; 1.1189 + src += 4; 1.1190 + dest += 4; 1.1191 + } 1.1192 + 1.1193 + for (i = width % 4; --i >= 0;) 1.1194 + { 1.1195 + uint32_t a = mask[i]; 1.1196 + uint32_t s = src[i]; 1.1197 + uint32_t d = dest[i]; 1.1198 + uint32_t sa = ALPHA_8 (s); 1.1199 + 1.1200 + UN8x4_MUL_UN8x4 (s, a); 1.1201 + UN8x4_MUL_UN8 (a, sa); 1.1202 + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); 1.1203 + 1.1204 + dest[i] = d; 1.1205 + } 1.1206 +} 1.1207 + 1.1208 +static void 1.1209 +vmx_combine_over_reverse_ca (pixman_implementation_t *imp, 1.1210 + pixman_op_t op, 1.1211 + uint32_t * dest, 1.1212 + const uint32_t * src, 1.1213 + const uint32_t * mask, 1.1214 + int width) 1.1215 +{ 1.1216 + int i; 1.1217 + vector unsigned int vdest, vsrc, vmask; 1.1218 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1219 + dest_mask, mask_mask, src_mask, store_mask; 1.1220 + 1.1221 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1222 + 1.1223 + /* printf("%s\n",__PRETTY_FUNCTION__); */ 1.1224 + for (i = width / 4; i > 0; i--) 1.1225 + { 1.1226 + LOAD_VECTORSC (dest, src, mask); 1.1227 + 1.1228 + vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); 1.1229 + 1.1230 + STORE_VECTOR (dest); 1.1231 + 1.1232 + mask += 4; 1.1233 + src += 4; 1.1234 + dest += 4; 1.1235 + } 1.1236 + 1.1237 + for (i = width % 4; --i >= 0;) 1.1238 + { 1.1239 + uint32_t a = mask[i]; 1.1240 + uint32_t s = src[i]; 1.1241 + uint32_t d = dest[i]; 1.1242 + uint32_t ida = ALPHA_8 (~d); 1.1243 + 1.1244 + UN8x4_MUL_UN8x4 (s, a); 1.1245 + UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); 1.1246 + 1.1247 + dest[i] = s; 1.1248 + } 1.1249 +} 1.1250 + 1.1251 +static void 1.1252 +vmx_combine_in_ca (pixman_implementation_t *imp, 1.1253 + pixman_op_t op, 1.1254 + uint32_t * dest, 1.1255 + const uint32_t * src, 1.1256 + const uint32_t * mask, 1.1257 + int width) 1.1258 +{ 1.1259 + int i; 1.1260 + vector unsigned int vdest, vsrc, vmask; 1.1261 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1262 + dest_mask, mask_mask, src_mask, store_mask; 1.1263 + 1.1264 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1265 + 1.1266 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1267 + for (i = width / 4; i > 0; i--) 1.1268 + { 1.1269 + LOAD_VECTORSC (dest, src, mask); 1.1270 + 1.1271 + vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); 1.1272 + 1.1273 + STORE_VECTOR (dest); 1.1274 + 1.1275 + src += 4; 1.1276 + dest += 4; 1.1277 + mask += 4; 1.1278 + } 1.1279 + 1.1280 + for (i = width % 4; --i >= 0;) 1.1281 + { 1.1282 + uint32_t a = mask[i]; 1.1283 + uint32_t s = src[i]; 1.1284 + uint32_t da = ALPHA_8 (dest[i]); 1.1285 + 1.1286 + UN8x4_MUL_UN8x4 (s, a); 1.1287 + UN8x4_MUL_UN8 (s, da); 1.1288 + 1.1289 + dest[i] = s; 1.1290 + } 1.1291 +} 1.1292 + 1.1293 +static void 1.1294 +vmx_combine_in_reverse_ca (pixman_implementation_t *imp, 1.1295 + pixman_op_t op, 1.1296 + uint32_t * dest, 1.1297 + const uint32_t * src, 1.1298 + const uint32_t * mask, 1.1299 + int width) 1.1300 +{ 1.1301 + int i; 1.1302 + vector unsigned int vdest, vsrc, vmask; 1.1303 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1304 + dest_mask, mask_mask, src_mask, store_mask; 1.1305 + 1.1306 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1307 + 1.1308 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1309 + for (i = width / 4; i > 0; i--) 1.1310 + { 1.1311 + 1.1312 + LOAD_VECTORSC (dest, src, mask); 1.1313 + 1.1314 + vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); 1.1315 + 1.1316 + STORE_VECTOR (dest); 1.1317 + 1.1318 + src += 4; 1.1319 + dest += 4; 1.1320 + mask += 4; 1.1321 + } 1.1322 + 1.1323 + for (i = width % 4; --i >= 0;) 1.1324 + { 1.1325 + uint32_t a = mask[i]; 1.1326 + uint32_t d = dest[i]; 1.1327 + uint32_t sa = ALPHA_8 (src[i]); 1.1328 + 1.1329 + UN8x4_MUL_UN8 (a, sa); 1.1330 + UN8x4_MUL_UN8x4 (d, a); 1.1331 + 1.1332 + dest[i] = d; 1.1333 + } 1.1334 +} 1.1335 + 1.1336 +static void 1.1337 +vmx_combine_out_ca (pixman_implementation_t *imp, 1.1338 + pixman_op_t op, 1.1339 + uint32_t * dest, 1.1340 + const uint32_t * src, 1.1341 + const uint32_t * mask, 1.1342 + int width) 1.1343 +{ 1.1344 + int i; 1.1345 + vector unsigned int vdest, vsrc, vmask; 1.1346 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1347 + dest_mask, mask_mask, src_mask, store_mask; 1.1348 + 1.1349 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1350 + 1.1351 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1352 + for (i = width / 4; i > 0; i--) 1.1353 + { 1.1354 + LOAD_VECTORSC (dest, src, mask); 1.1355 + 1.1356 + vdest = pix_multiply ( 1.1357 + pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); 1.1358 + 1.1359 + STORE_VECTOR (dest); 1.1360 + 1.1361 + src += 4; 1.1362 + dest += 4; 1.1363 + mask += 4; 1.1364 + } 1.1365 + 1.1366 + for (i = width % 4; --i >= 0;) 1.1367 + { 1.1368 + uint32_t a = mask[i]; 1.1369 + uint32_t s = src[i]; 1.1370 + uint32_t d = dest[i]; 1.1371 + uint32_t da = ALPHA_8 (~d); 1.1372 + 1.1373 + UN8x4_MUL_UN8x4 (s, a); 1.1374 + UN8x4_MUL_UN8 (s, da); 1.1375 + 1.1376 + dest[i] = s; 1.1377 + } 1.1378 +} 1.1379 + 1.1380 +static void 1.1381 +vmx_combine_out_reverse_ca (pixman_implementation_t *imp, 1.1382 + pixman_op_t op, 1.1383 + uint32_t * dest, 1.1384 + const uint32_t * src, 1.1385 + const uint32_t * mask, 1.1386 + int width) 1.1387 +{ 1.1388 + int i; 1.1389 + vector unsigned int vdest, vsrc, vmask; 1.1390 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1391 + dest_mask, mask_mask, src_mask, store_mask; 1.1392 + 1.1393 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1394 + 1.1395 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1396 + for (i = width / 4; i > 0; i--) 1.1397 + { 1.1398 + LOAD_VECTORSC (dest, src, mask); 1.1399 + 1.1400 + vdest = pix_multiply ( 1.1401 + vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); 1.1402 + 1.1403 + STORE_VECTOR (dest); 1.1404 + 1.1405 + src += 4; 1.1406 + dest += 4; 1.1407 + mask += 4; 1.1408 + } 1.1409 + 1.1410 + for (i = width % 4; --i >= 0;) 1.1411 + { 1.1412 + uint32_t a = mask[i]; 1.1413 + uint32_t s = src[i]; 1.1414 + uint32_t d = dest[i]; 1.1415 + uint32_t sa = ALPHA_8 (s); 1.1416 + 1.1417 + UN8x4_MUL_UN8 (a, sa); 1.1418 + UN8x4_MUL_UN8x4 (d, ~a); 1.1419 + 1.1420 + dest[i] = d; 1.1421 + } 1.1422 +} 1.1423 + 1.1424 +static void 1.1425 +vmx_combine_atop_ca (pixman_implementation_t *imp, 1.1426 + pixman_op_t op, 1.1427 + uint32_t * dest, 1.1428 + const uint32_t * src, 1.1429 + const uint32_t * mask, 1.1430 + int width) 1.1431 +{ 1.1432 + int i; 1.1433 + vector unsigned int vdest, vsrc, vmask, vsrca; 1.1434 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1435 + dest_mask, mask_mask, src_mask, store_mask; 1.1436 + 1.1437 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1438 + 1.1439 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1440 + for (i = width / 4; i > 0; i--) 1.1441 + { 1.1442 + LOAD_VECTORSC (dest, src, mask); 1.1443 + 1.1444 + vsrca = splat_alpha (vsrc); 1.1445 + 1.1446 + vsrc = pix_multiply (vsrc, vmask); 1.1447 + vmask = pix_multiply (vmask, vsrca); 1.1448 + 1.1449 + vdest = pix_add_mul (vsrc, splat_alpha (vdest), 1.1450 + negate (vmask), vdest); 1.1451 + 1.1452 + STORE_VECTOR (dest); 1.1453 + 1.1454 + src += 4; 1.1455 + dest += 4; 1.1456 + mask += 4; 1.1457 + } 1.1458 + 1.1459 + for (i = width % 4; --i >= 0;) 1.1460 + { 1.1461 + uint32_t a = mask[i]; 1.1462 + uint32_t s = src[i]; 1.1463 + uint32_t d = dest[i]; 1.1464 + uint32_t sa = ALPHA_8 (s); 1.1465 + uint32_t da = ALPHA_8 (d); 1.1466 + 1.1467 + UN8x4_MUL_UN8x4 (s, a); 1.1468 + UN8x4_MUL_UN8 (a, sa); 1.1469 + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); 1.1470 + 1.1471 + dest[i] = d; 1.1472 + } 1.1473 +} 1.1474 + 1.1475 +static void 1.1476 +vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, 1.1477 + pixman_op_t op, 1.1478 + uint32_t * dest, 1.1479 + const uint32_t * src, 1.1480 + const uint32_t * mask, 1.1481 + int width) 1.1482 +{ 1.1483 + int i; 1.1484 + vector unsigned int vdest, vsrc, vmask; 1.1485 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1486 + dest_mask, mask_mask, src_mask, store_mask; 1.1487 + 1.1488 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1489 + 1.1490 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1491 + for (i = width / 4; i > 0; i--) 1.1492 + { 1.1493 + LOAD_VECTORSC (dest, src, mask); 1.1494 + 1.1495 + vdest = pix_add_mul (vdest, 1.1496 + pix_multiply (vmask, splat_alpha (vsrc)), 1.1497 + pix_multiply (vsrc, vmask), 1.1498 + negate (splat_alpha (vdest))); 1.1499 + 1.1500 + STORE_VECTOR (dest); 1.1501 + 1.1502 + src += 4; 1.1503 + dest += 4; 1.1504 + mask += 4; 1.1505 + } 1.1506 + 1.1507 + for (i = width % 4; --i >= 0;) 1.1508 + { 1.1509 + uint32_t a = mask[i]; 1.1510 + uint32_t s = src[i]; 1.1511 + uint32_t d = dest[i]; 1.1512 + uint32_t sa = ALPHA_8 (s); 1.1513 + uint32_t da = ALPHA_8 (~d); 1.1514 + 1.1515 + UN8x4_MUL_UN8x4 (s, a); 1.1516 + UN8x4_MUL_UN8 (a, sa); 1.1517 + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); 1.1518 + 1.1519 + dest[i] = d; 1.1520 + } 1.1521 +} 1.1522 + 1.1523 +static void 1.1524 +vmx_combine_xor_ca (pixman_implementation_t *imp, 1.1525 + pixman_op_t op, 1.1526 + uint32_t * dest, 1.1527 + const uint32_t * src, 1.1528 + const uint32_t * mask, 1.1529 + int width) 1.1530 +{ 1.1531 + int i; 1.1532 + vector unsigned int vdest, vsrc, vmask; 1.1533 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1534 + dest_mask, mask_mask, src_mask, store_mask; 1.1535 + 1.1536 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1537 + 1.1538 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1539 + for (i = width / 4; i > 0; i--) 1.1540 + { 1.1541 + LOAD_VECTORSC (dest, src, mask); 1.1542 + 1.1543 + vdest = pix_add_mul (vdest, 1.1544 + negate (pix_multiply (vmask, splat_alpha (vsrc))), 1.1545 + pix_multiply (vsrc, vmask), 1.1546 + negate (splat_alpha (vdest))); 1.1547 + 1.1548 + STORE_VECTOR (dest); 1.1549 + 1.1550 + src += 4; 1.1551 + dest += 4; 1.1552 + mask += 4; 1.1553 + } 1.1554 + 1.1555 + for (i = width % 4; --i >= 0;) 1.1556 + { 1.1557 + uint32_t a = mask[i]; 1.1558 + uint32_t s = src[i]; 1.1559 + uint32_t d = dest[i]; 1.1560 + uint32_t sa = ALPHA_8 (s); 1.1561 + uint32_t da = ALPHA_8 (~d); 1.1562 + 1.1563 + UN8x4_MUL_UN8x4 (s, a); 1.1564 + UN8x4_MUL_UN8 (a, sa); 1.1565 + UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); 1.1566 + 1.1567 + dest[i] = d; 1.1568 + } 1.1569 +} 1.1570 + 1.1571 +static void 1.1572 +vmx_combine_add_ca (pixman_implementation_t *imp, 1.1573 + pixman_op_t op, 1.1574 + uint32_t * dest, 1.1575 + const uint32_t * src, 1.1576 + const uint32_t * mask, 1.1577 + int width) 1.1578 +{ 1.1579 + int i; 1.1580 + vector unsigned int vdest, vsrc, vmask; 1.1581 + vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, 1.1582 + dest_mask, mask_mask, src_mask, store_mask; 1.1583 + 1.1584 + COMPUTE_SHIFT_MASKC (dest, src, mask); 1.1585 + 1.1586 + /* printf ("%s\n",__PRETTY_FUNCTION__); */ 1.1587 + for (i = width / 4; i > 0; i--) 1.1588 + { 1.1589 + LOAD_VECTORSC (dest, src, mask); 1.1590 + 1.1591 + vdest = pix_add (pix_multiply (vsrc, vmask), vdest); 1.1592 + 1.1593 + STORE_VECTOR (dest); 1.1594 + 1.1595 + src += 4; 1.1596 + dest += 4; 1.1597 + mask += 4; 1.1598 + } 1.1599 + 1.1600 + for (i = width % 4; --i >= 0;) 1.1601 + { 1.1602 + uint32_t a = mask[i]; 1.1603 + uint32_t s = src[i]; 1.1604 + uint32_t d = dest[i]; 1.1605 + 1.1606 + UN8x4_MUL_UN8x4 (s, a); 1.1607 + UN8x4_ADD_UN8x4 (s, d); 1.1608 + 1.1609 + dest[i] = s; 1.1610 + } 1.1611 +} 1.1612 + 1.1613 +static const pixman_fast_path_t vmx_fast_paths[] = 1.1614 +{ 1.1615 + { PIXMAN_OP_NONE }, 1.1616 +}; 1.1617 + 1.1618 +pixman_implementation_t * 1.1619 +_pixman_implementation_create_vmx (pixman_implementation_t *fallback) 1.1620 +{ 1.1621 + pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); 1.1622 + 1.1623 + /* Set up function pointers */ 1.1624 + 1.1625 + imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; 1.1626 + imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; 1.1627 + imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; 1.1628 + imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; 1.1629 + imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; 1.1630 + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; 1.1631 + imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; 1.1632 + imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; 1.1633 + imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; 1.1634 + 1.1635 + imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; 1.1636 + 1.1637 + imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; 1.1638 + imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; 1.1639 + imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; 1.1640 + imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; 1.1641 + imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; 1.1642 + imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; 1.1643 + imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; 1.1644 + imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; 1.1645 + imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; 1.1646 + imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; 1.1647 + imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; 1.1648 + 1.1649 + return imp; 1.1650 +}