michael@0: /* michael@0: * Copyright © 2007 Luca Barbato michael@0: * michael@0: * Permission to use, copy, modify, distribute, and sell this software and its michael@0: * documentation for any purpose is hereby granted without fee, provided that michael@0: * the above copyright notice appear in all copies and that both that michael@0: * copyright notice and this permission notice appear in supporting michael@0: * documentation, and that the name of Luca Barbato not be used in advertising or michael@0: * publicity pertaining to distribution of the software without specific, michael@0: * written prior permission. Luca Barbato makes no representations about the michael@0: * suitability of this software for any purpose. It is provided "as is" michael@0: * without express or implied warranty. michael@0: * michael@0: * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS michael@0: * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND michael@0: * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY michael@0: * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES michael@0: * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN michael@0: * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING michael@0: * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS michael@0: * SOFTWARE. michael@0: * michael@0: * Author: Luca Barbato (lu_zero@gentoo.org) michael@0: * michael@0: * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell michael@0: */ michael@0: michael@0: #include michael@0: #include "pixman-private.h" michael@0: #include "pixman-combine32.h" michael@0: #include michael@0: michael@0: #define AVV(x...) {x} michael@0: michael@0: static force_inline vector unsigned int michael@0: splat_alpha (vector unsigned int pix) michael@0: { michael@0: return vec_perm (pix, pix, michael@0: (vector unsigned char)AVV ( michael@0: 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, michael@0: 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); michael@0: } michael@0: michael@0: static force_inline vector unsigned int michael@0: pix_multiply (vector unsigned int p, vector unsigned int a) michael@0: { michael@0: vector unsigned short hi, lo, mod; michael@0: michael@0: /* unpack to short */ michael@0: hi = (vector unsigned short) michael@0: vec_mergeh ((vector unsigned char)AVV (0), michael@0: (vector unsigned char)p); michael@0: michael@0: mod = (vector unsigned short) michael@0: vec_mergeh ((vector unsigned char)AVV (0), michael@0: (vector unsigned char)a); michael@0: michael@0: hi = vec_mladd (hi, mod, (vector unsigned short) michael@0: AVV (0x0080, 0x0080, 0x0080, 0x0080, michael@0: 0x0080, 0x0080, 0x0080, 0x0080)); michael@0: michael@0: hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); michael@0: michael@0: hi = vec_sr (hi, vec_splat_u16 (8)); michael@0: michael@0: /* unpack to short */ michael@0: lo = (vector unsigned short) michael@0: vec_mergel ((vector unsigned char)AVV (0), michael@0: (vector unsigned char)p); michael@0: mod = (vector unsigned short) michael@0: vec_mergel ((vector unsigned char)AVV (0), michael@0: (vector unsigned char)a); michael@0: michael@0: lo = vec_mladd (lo, mod, (vector unsigned short) michael@0: AVV (0x0080, 0x0080, 0x0080, 0x0080, michael@0: 0x0080, 0x0080, 0x0080, 0x0080)); michael@0: michael@0: lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); michael@0: michael@0: lo = vec_sr (lo, vec_splat_u16 (8)); michael@0: michael@0: return (vector unsigned int)vec_packsu (hi, lo); michael@0: } michael@0: michael@0: static force_inline vector unsigned int michael@0: pix_add (vector unsigned int a, vector unsigned int b) michael@0: { michael@0: return (vector unsigned int)vec_adds ((vector unsigned char)a, michael@0: (vector unsigned char)b); michael@0: } michael@0: michael@0: static force_inline vector unsigned int michael@0: pix_add_mul (vector unsigned int x, michael@0: vector unsigned int a, michael@0: vector unsigned int y, michael@0: vector unsigned int b) michael@0: { michael@0: vector unsigned int t1, t2; michael@0: michael@0: t1 = pix_multiply (x, a); michael@0: t2 = pix_multiply (y, b); michael@0: michael@0: return pix_add (t1, t2); michael@0: } michael@0: michael@0: static force_inline vector unsigned int michael@0: negate (vector unsigned int src) michael@0: { michael@0: return vec_nor (src, src); michael@0: } michael@0: michael@0: /* dest*~srca + src */ michael@0: static force_inline vector unsigned int michael@0: over (vector unsigned int src, michael@0: vector unsigned int srca, michael@0: vector unsigned int dest) michael@0: { michael@0: vector unsigned char tmp = (vector unsigned char) michael@0: pix_multiply (dest, negate (srca)); michael@0: michael@0: tmp = vec_adds ((vector unsigned char)src, tmp); michael@0: return (vector unsigned int)tmp; michael@0: } michael@0: michael@0: /* in == pix_multiply */ michael@0: #define in_over(src, srca, mask, dest) \ michael@0: over (pix_multiply (src, mask), \ michael@0: pix_multiply (srca, mask), dest) michael@0: michael@0: michael@0: #define COMPUTE_SHIFT_MASK(source) \ michael@0: source ## _mask = vec_lvsl (0, source); michael@0: michael@0: #define COMPUTE_SHIFT_MASKS(dest, source) \ michael@0: dest ## _mask = vec_lvsl (0, dest); \ michael@0: source ## _mask = vec_lvsl (0, source); \ michael@0: store_mask = vec_lvsr (0, dest); michael@0: michael@0: #define COMPUTE_SHIFT_MASKC(dest, source, mask) \ michael@0: mask ## _mask = vec_lvsl (0, mask); \ michael@0: dest ## _mask = vec_lvsl (0, dest); \ michael@0: source ## _mask = vec_lvsl (0, source); \ michael@0: store_mask = vec_lvsr (0, dest); michael@0: michael@0: /* notice you have to declare temp vars... michael@0: * Note: tmp3 and tmp4 must remain untouched! michael@0: */ michael@0: michael@0: #define LOAD_VECTORS(dest, source) \ michael@0: tmp1 = (typeof(tmp1))vec_ld (0, source); \ michael@0: tmp2 = (typeof(tmp2))vec_ld (15, source); \ michael@0: tmp3 = (typeof(tmp3))vec_ld (0, dest); \ michael@0: v ## source = (typeof(v ## source)) \ michael@0: vec_perm (tmp1, tmp2, source ## _mask); \ michael@0: tmp4 = (typeof(tmp4))vec_ld (15, dest); \ michael@0: v ## dest = (typeof(v ## dest)) \ michael@0: vec_perm (tmp3, tmp4, dest ## _mask); michael@0: michael@0: #define LOAD_VECTORSC(dest, source, mask) \ michael@0: tmp1 = (typeof(tmp1))vec_ld (0, source); \ michael@0: tmp2 = (typeof(tmp2))vec_ld (15, source); \ michael@0: tmp3 = (typeof(tmp3))vec_ld (0, dest); \ michael@0: v ## source = (typeof(v ## source)) \ michael@0: vec_perm (tmp1, tmp2, source ## _mask); \ michael@0: tmp4 = (typeof(tmp4))vec_ld (15, dest); \ michael@0: tmp1 = (typeof(tmp1))vec_ld (0, mask); \ michael@0: v ## dest = (typeof(v ## dest)) \ michael@0: vec_perm (tmp3, tmp4, dest ## _mask); \ michael@0: tmp2 = (typeof(tmp2))vec_ld (15, mask); \ michael@0: v ## mask = (typeof(v ## mask)) \ michael@0: vec_perm (tmp1, tmp2, mask ## _mask); michael@0: michael@0: #define LOAD_VECTORSM(dest, source, mask) \ michael@0: LOAD_VECTORSC (dest, source, mask) \ michael@0: v ## source = pix_multiply (v ## source, \ michael@0: splat_alpha (v ## mask)); michael@0: michael@0: #define STORE_VECTOR(dest) \ michael@0: edges = vec_perm (tmp4, tmp3, dest ## _mask); \ michael@0: tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \ michael@0: tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \ michael@0: vec_st ((vector unsigned int) tmp3, 15, dest); \ michael@0: vec_st ((vector unsigned int) tmp1, 0, dest); michael@0: michael@0: static void michael@0: vmx_combine_over_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = over (vsrc, splat_alpha (vsrc), vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t ia = ALPHA_8 (~s); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = over (vsrc, splat_alpha (vsrc), vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t ia; michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: michael@0: ia = ALPHA_8 (~s); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s); michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_over_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_over_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_reverse_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = over (vdest, splat_alpha (vdest), vsrc); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t ia = ALPHA_8 (~dest[i]); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_reverse_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = over (vdest, splat_alpha (vdest), vsrc); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t ia = ALPHA_8 (~dest[i]); michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d); michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_reverse_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_over_reverse_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_over_reverse_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_multiply (vsrc, splat_alpha (vdest)); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t a = ALPHA_8 (dest[i]); michael@0: michael@0: UN8x4_MUL_UN8 (s, a); michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (vsrc, splat_alpha (vdest)); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t a = ALPHA_8 (dest[i]); michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: UN8x4_MUL_UN8 (s, a); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_in_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_in_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_reverse_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_multiply (vdest, splat_alpha (vsrc)); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t d = dest[i]; michael@0: uint32_t a = ALPHA_8 (src[i]); michael@0: michael@0: UN8x4_MUL_UN8 (d, a); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_reverse_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (vdest, splat_alpha (vsrc)); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t d = dest[i]; michael@0: uint32_t a = src[i]; michael@0: michael@0: UN8x4_MUL_UN8 (a, m); michael@0: a = ALPHA_8 (a); michael@0: UN8x4_MUL_UN8 (d, a); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_reverse_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_in_reverse_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_in_reverse_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t a = ALPHA_8 (~dest[i]); michael@0: michael@0: UN8x4_MUL_UN8 (s, a); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (vsrc, splat_alpha (negate (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t a = ALPHA_8 (~dest[i]); michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: UN8x4_MUL_UN8 (s, a); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_out_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_out_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_reverse_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t d = dest[i]; michael@0: uint32_t a = ALPHA_8 (~src[i]); michael@0: michael@0: UN8x4_MUL_UN8 (d, a); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_reverse_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (vdest, splat_alpha (negate (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t d = dest[i]; michael@0: uint32_t a = src[i]; michael@0: michael@0: UN8x4_MUL_UN8 (a, m); michael@0: a = ALPHA_8 (~a); michael@0: UN8x4_MUL_UN8 (d, a); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_reverse_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_out_reverse_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_out_reverse_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_add_mul (vsrc, splat_alpha (vdest), michael@0: vdest, splat_alpha (negate (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t dest_a = ALPHA_8 (d); michael@0: uint32_t src_ia = ALPHA_8 (~s); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_add_mul (vsrc, splat_alpha (vdest), michael@0: vdest, splat_alpha (negate (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t dest_a = ALPHA_8 (d); michael@0: uint32_t src_ia; michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: michael@0: src_ia = ALPHA_8 (~s); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_atop_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_atop_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_reverse_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_add_mul (vdest, splat_alpha (vsrc), michael@0: vsrc, splat_alpha (negate (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t src_a = ALPHA_8 (s); michael@0: uint32_t dest_ia = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_reverse_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_add_mul (vdest, splat_alpha (vsrc), michael@0: vsrc, splat_alpha (negate (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t src_a; michael@0: uint32_t dest_ia = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: michael@0: src_a = ALPHA_8 (s); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_reverse_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_atop_reverse_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_atop_reverse_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_xor_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), michael@0: vdest, splat_alpha (negate (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t src_ia = ALPHA_8 (~s); michael@0: uint32_t dest_ia = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_xor_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)), michael@0: vdest, splat_alpha (negate (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t src_ia; michael@0: uint32_t dest_ia = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: michael@0: src_ia = ALPHA_8 (~s); michael@0: michael@0: UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_xor_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_xor_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_xor_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_add_u_no_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKS (dest, src); michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORS (dest, src); michael@0: michael@0: vdest = pix_add (vsrc, vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: michael@0: UN8x4_ADD_UN8x4 (d, s); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_add_u_mask (uint32_t * dest, michael@0: const uint32_t *src, michael@0: const uint32_t *mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, src_mask, mask_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSM (dest, src, mask); michael@0: michael@0: vdest = pix_add (vsrc, vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t m = ALPHA_8 (mask[i]); michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: michael@0: UN8x4_MUL_UN8 (s, m); michael@0: UN8x4_ADD_UN8x4 (d, s); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_add_u (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: if (mask) michael@0: vmx_combine_add_u_mask (dest, src, mask, width); michael@0: else michael@0: vmx_combine_add_u_no_mask (dest, src, width); michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_src_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (vsrc, vmask); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: mask += 4; michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: mask += 4; michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t sa = ALPHA_8 (s); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8 (a, sa); michael@0: UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_over_reverse_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask)); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: mask += 4; michael@0: src += 4; michael@0: dest += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t ida = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest)); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t da = ALPHA_8 (dest[i]); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8 (s, da); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_in_reverse_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t sa = ALPHA_8 (src[i]); michael@0: michael@0: UN8x4_MUL_UN8 (a, sa); michael@0: UN8x4_MUL_UN8x4 (d, a); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_multiply ( michael@0: pix_multiply (vsrc, vmask), splat_alpha (negate (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t da = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8 (s, da); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_out_reverse_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_multiply ( michael@0: vdest, negate (pix_multiply (vmask, splat_alpha (vsrc)))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t sa = ALPHA_8 (s); michael@0: michael@0: UN8x4_MUL_UN8 (a, sa); michael@0: UN8x4_MUL_UN8x4 (d, ~a); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask, vsrca; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vsrca = splat_alpha (vsrc); michael@0: michael@0: vsrc = pix_multiply (vsrc, vmask); michael@0: vmask = pix_multiply (vmask, vsrca); michael@0: michael@0: vdest = pix_add_mul (vsrc, splat_alpha (vdest), michael@0: negate (vmask), vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t sa = ALPHA_8 (s); michael@0: uint32_t da = ALPHA_8 (d); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8 (a, sa); michael@0: UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_add_mul (vdest, michael@0: pix_multiply (vmask, splat_alpha (vsrc)), michael@0: pix_multiply (vsrc, vmask), michael@0: negate (splat_alpha (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t sa = ALPHA_8 (s); michael@0: uint32_t da = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8 (a, sa); michael@0: UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_xor_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_add_mul (vdest, michael@0: negate (pix_multiply (vmask, splat_alpha (vsrc))), michael@0: pix_multiply (vsrc, vmask), michael@0: negate (splat_alpha (vdest))); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: uint32_t sa = ALPHA_8 (s); michael@0: uint32_t da = ALPHA_8 (~d); michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_MUL_UN8 (a, sa); michael@0: UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da); michael@0: michael@0: dest[i] = d; michael@0: } michael@0: } michael@0: michael@0: static void michael@0: vmx_combine_add_ca (pixman_implementation_t *imp, michael@0: pixman_op_t op, michael@0: uint32_t * dest, michael@0: const uint32_t * src, michael@0: const uint32_t * mask, michael@0: int width) michael@0: { michael@0: int i; michael@0: vector unsigned int vdest, vsrc, vmask; michael@0: vector unsigned char tmp1, tmp2, tmp3, tmp4, edges, michael@0: dest_mask, mask_mask, src_mask, store_mask; michael@0: michael@0: COMPUTE_SHIFT_MASKC (dest, src, mask); michael@0: michael@0: /* printf ("%s\n",__PRETTY_FUNCTION__); */ michael@0: for (i = width / 4; i > 0; i--) michael@0: { michael@0: LOAD_VECTORSC (dest, src, mask); michael@0: michael@0: vdest = pix_add (pix_multiply (vsrc, vmask), vdest); michael@0: michael@0: STORE_VECTOR (dest); michael@0: michael@0: src += 4; michael@0: dest += 4; michael@0: mask += 4; michael@0: } michael@0: michael@0: for (i = width % 4; --i >= 0;) michael@0: { michael@0: uint32_t a = mask[i]; michael@0: uint32_t s = src[i]; michael@0: uint32_t d = dest[i]; michael@0: michael@0: UN8x4_MUL_UN8x4 (s, a); michael@0: UN8x4_ADD_UN8x4 (s, d); michael@0: michael@0: dest[i] = s; michael@0: } michael@0: } michael@0: michael@0: static const pixman_fast_path_t vmx_fast_paths[] = michael@0: { michael@0: { PIXMAN_OP_NONE }, michael@0: }; michael@0: michael@0: pixman_implementation_t * michael@0: _pixman_implementation_create_vmx (pixman_implementation_t *fallback) michael@0: { michael@0: pixman_implementation_t *imp = _pixman_implementation_create (fallback, vmx_fast_paths); michael@0: michael@0: /* Set up function pointers */ michael@0: michael@0: imp->combine_32[PIXMAN_OP_OVER] = vmx_combine_over_u; michael@0: imp->combine_32[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_u; michael@0: imp->combine_32[PIXMAN_OP_IN] = vmx_combine_in_u; michael@0: imp->combine_32[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_u; michael@0: imp->combine_32[PIXMAN_OP_OUT] = vmx_combine_out_u; michael@0: imp->combine_32[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_u; michael@0: imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u; michael@0: imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u; michael@0: imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u; michael@0: michael@0: imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u; michael@0: michael@0: imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_IN] = vmx_combine_in_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = vmx_combine_in_reverse_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_OUT] = vmx_combine_out_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = vmx_combine_out_reverse_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_ATOP] = vmx_combine_atop_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_XOR] = vmx_combine_xor_ca; michael@0: imp->combine_32_ca[PIXMAN_OP_ADD] = vmx_combine_add_ca; michael@0: michael@0: return imp; michael@0: }