gfx/cairo/libpixman/src/pixman-arm-neon.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,513 @@
     1.4 +/*
     1.5 + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
     1.6 + *
     1.7 + * Permission to use, copy, modify, distribute, and sell this software and its
     1.8 + * documentation for any purpose is hereby granted without fee, provided that
     1.9 + * the above copyright notice appear in all copies and that both that
    1.10 + * copyright notice and this permission notice appear in supporting
    1.11 + * documentation, and that the name of ARM Ltd not be used in
    1.12 + * advertising or publicity pertaining to distribution of the software without
    1.13 + * specific, written prior permission.  ARM Ltd makes no
    1.14 + * representations about the suitability of this software for any purpose.  It
    1.15 + * is provided "as is" without express or implied warranty.
    1.16 + *
    1.17 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
    1.18 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    1.19 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
    1.20 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    1.21 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
    1.22 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
    1.23 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    1.24 + * SOFTWARE.
    1.25 + *
    1.26 + * Author:  Ian Rickards (ian.rickards@arm.com)
    1.27 + * Author:  Jonathan Morton (jonathan.morton@movial.com)
    1.28 + * Author:  Markku Vire (markku.vire@movial.com)
    1.29 + *
    1.30 + */
    1.31 +
    1.32 +#ifdef HAVE_CONFIG_H
    1.33 +#include <config.h>
    1.34 +#endif
    1.35 +
    1.36 +#include <string.h>
    1.37 +#include "pixman-private.h"
    1.38 +#include "pixman-arm-common.h"
    1.39 +
    1.40 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
    1.41 +                                   uint32_t, 1, uint32_t, 1)
    1.42 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
    1.43 +                                   uint32_t, 1, uint32_t, 1)
    1.44 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
    1.45 +                                   uint16_t, 1, uint16_t, 1)
    1.46 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
    1.47 +                                   uint8_t, 3, uint8_t, 3)
    1.48 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
    1.49 +                                   uint32_t, 1, uint16_t, 1)
    1.50 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
    1.51 +                                   uint16_t, 1, uint32_t, 1)
    1.52 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
    1.53 +                                   uint8_t, 3, uint32_t, 1)
    1.54 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
    1.55 +                                   uint8_t, 3, uint16_t, 1)
    1.56 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
    1.57 +                                   uint32_t, 1, uint32_t, 1)
    1.58 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
    1.59 +                                   uint32_t, 1, uint32_t, 1)
    1.60 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
    1.61 +                                   uint8_t, 1, uint8_t, 1)
    1.62 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
    1.63 +                                   uint32_t, 1, uint32_t, 1)
    1.64 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
    1.65 +                                   uint32_t, 1, uint16_t, 1)
    1.66 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
    1.67 +                                   uint32_t, 1, uint32_t, 1)
    1.68 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
    1.69 +                                   uint8_t, 1, uint16_t, 1)
    1.70 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888,
    1.71 +                                   uint8_t, 1, uint32_t, 1)
    1.72 +
    1.73 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
    1.74 +                                 uint16_t, 1)
    1.75 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
    1.76 +                                 uint32_t, 1)
    1.77 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
    1.78 +                                 uint32_t, 1)
    1.79 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
    1.80 +                                 uint8_t, 1)
    1.81 +
    1.82 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
    1.83 +                                      uint8_t, 1, uint16_t, 1)
    1.84 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
    1.85 +                                      uint8_t, 1, uint32_t, 1)
    1.86 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
    1.87 +                                      uint32_t, 1, uint32_t, 1)
    1.88 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca,
    1.89 +				      uint32_t, 1, uint16_t, 1)
    1.90 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
    1.91 +                                      uint8_t, 1, uint8_t, 1)
    1.92 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
    1.93 +                                      uint8_t, 1, uint8_t, 1)
    1.94 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
    1.95 +                                      uint8_t, 1, uint32_t, 1)
    1.96 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888,
    1.97 +                                      uint8_t, 1, uint32_t, 1)
    1.98 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8,
    1.99 +                                      uint8_t, 1, uint8_t, 1)
   1.100 +
   1.101 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
   1.102 +                                     uint32_t, 1, uint32_t, 1)
   1.103 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
   1.104 +                                     uint32_t, 1, uint16_t, 1)
   1.105 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
   1.106 +                                     uint16_t, 1, uint16_t, 1)
   1.107 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
   1.108 +                                     uint32_t, 1, uint32_t, 1)
   1.109 +
   1.110 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
   1.111 +                                        uint8_t, 1, uint8_t, 1, uint8_t, 1)
   1.112 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
   1.113 +                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
   1.114 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
   1.115 +                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
   1.116 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
   1.117 +                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
   1.118 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
   1.119 +                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
   1.120 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
   1.121 +                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
   1.122 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
   1.123 +                                        uint32_t, 1, uint8_t, 1, uint16_t, 1)
   1.124 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
   1.125 +                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
   1.126 +
   1.127 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
   1.128 +                                        uint32_t, uint32_t)
   1.129 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
   1.130 +                                        uint32_t, uint16_t)
   1.131 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
   1.132 +                                        uint32_t, uint16_t)
   1.133 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
   1.134 +                                        uint16_t, uint32_t)
   1.135 +
   1.136 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
   1.137 +                                           OVER, uint32_t, uint16_t)
   1.138 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
   1.139 +                                           OVER, uint16_t, uint16_t)
   1.140 +
   1.141 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
   1.142 +                                         uint32_t, uint32_t)
   1.143 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
   1.144 +                                         uint32_t, uint16_t)
   1.145 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
   1.146 +                                         uint16_t, uint32_t)
   1.147 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
   1.148 +                                         uint16_t, uint16_t)
   1.149 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
   1.150 +                                         uint32_t, uint32_t)
   1.151 +static force_inline void
   1.152 +pixman_scaled_bilinear_scanline_8888_8888_SRC (
   1.153 +                                                uint32_t *       dst,
   1.154 +                                                const uint32_t * mask,
   1.155 +                                                const uint32_t * src_top,
   1.156 +                                                const uint32_t * src_bottom,
   1.157 +                                                int32_t          w,
   1.158 +                                                int              wt,
   1.159 +                                                int              wb,
   1.160 +                                                pixman_fixed_t   vx,
   1.161 +                                                pixman_fixed_t   unit_x,
   1.162 +                                                pixman_fixed_t   max_vx,
   1.163 +                                                pixman_bool_t    zero_src)
   1.164 +{
   1.165 +    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w);
   1.166 +}
   1.167 +
   1.168 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
   1.169 +                                         uint32_t, uint32_t)
   1.170 +
   1.171 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
   1.172 +                                            uint32_t, uint32_t)
   1.173 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
   1.174 +                                            uint32_t, uint16_t)
   1.175 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
   1.176 +                                            uint16_t, uint32_t)
   1.177 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC,
   1.178 +                                            uint16_t, uint16_t)
   1.179 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER,
   1.180 +                                            uint32_t, uint32_t)
   1.181 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD,
   1.182 +                                            uint32_t, uint32_t)
   1.183 +
   1.184 +void
   1.185 +pixman_composite_src_n_8_asm_neon (int32_t   w,
   1.186 +                                   int32_t   h,
   1.187 +                                   uint8_t  *dst,
   1.188 +                                   int32_t   dst_stride,
   1.189 +                                   uint8_t   src);
   1.190 +
   1.191 +void
   1.192 +pixman_composite_src_n_0565_asm_neon (int32_t   w,
   1.193 +                                      int32_t   h,
   1.194 +                                      uint16_t *dst,
   1.195 +                                      int32_t   dst_stride,
   1.196 +                                      uint16_t  src);
   1.197 +
   1.198 +void
   1.199 +pixman_composite_src_n_8888_asm_neon (int32_t   w,
   1.200 +                                      int32_t   h,
   1.201 +                                      uint32_t *dst,
   1.202 +                                      int32_t   dst_stride,
   1.203 +                                      uint32_t  src);
   1.204 +
   1.205 +static pixman_bool_t
   1.206 +arm_neon_fill (pixman_implementation_t *imp,
   1.207 +               uint32_t *               bits,
   1.208 +               int                      stride,
   1.209 +               int                      bpp,
   1.210 +               int                      x,
   1.211 +               int                      y,
   1.212 +               int                      width,
   1.213 +               int                      height,
   1.214 +	       uint32_t                 _xor)
   1.215 +{
   1.216 +    /* stride is always multiple of 32bit units in pixman */
   1.217 +    uint32_t byte_stride = stride * sizeof(uint32_t);
   1.218 +
   1.219 +    switch (bpp)
   1.220 +    {
   1.221 +    case 8:
   1.222 +	pixman_composite_src_n_8_asm_neon (
   1.223 +		width,
   1.224 +		height,
   1.225 +		(uint8_t *)(((char *) bits) + y * byte_stride + x),
   1.226 +		byte_stride,
   1.227 +		_xor & 0xff);
   1.228 +	return TRUE;
   1.229 +    case 16:
   1.230 +	pixman_composite_src_n_0565_asm_neon (
   1.231 +		width,
   1.232 +		height,
   1.233 +		(uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
   1.234 +		byte_stride / 2,
   1.235 +		_xor & 0xffff);
   1.236 +	return TRUE;
   1.237 +    case 32:
   1.238 +	pixman_composite_src_n_8888_asm_neon (
   1.239 +		width,
   1.240 +		height,
   1.241 +		(uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
   1.242 +		byte_stride / 4,
   1.243 +		_xor);
   1.244 +	return TRUE;
   1.245 +    default:
   1.246 +	return FALSE;
   1.247 +    }
   1.248 +}
   1.249 +
   1.250 +static pixman_bool_t
   1.251 +arm_neon_blt (pixman_implementation_t *imp,
   1.252 +              uint32_t *               src_bits,
   1.253 +              uint32_t *               dst_bits,
   1.254 +              int                      src_stride,
   1.255 +              int                      dst_stride,
   1.256 +              int                      src_bpp,
   1.257 +              int                      dst_bpp,
   1.258 +              int                      src_x,
   1.259 +              int                      src_y,
   1.260 +              int                      dest_x,
   1.261 +              int                      dest_y,
   1.262 +              int                      width,
   1.263 +              int                      height)
   1.264 +{
   1.265 +    if (src_bpp != dst_bpp)
   1.266 +	return FALSE;
   1.267 +
   1.268 +    switch (src_bpp)
   1.269 +    {
   1.270 +    case 16:
   1.271 +	pixman_composite_src_0565_0565_asm_neon (
   1.272 +		width, height,
   1.273 +		(uint16_t *)(((char *) dst_bits) +
   1.274 +		dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
   1.275 +		(uint16_t *)(((char *) src_bits) +
   1.276 +		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
   1.277 +	return TRUE;
   1.278 +    case 32:
   1.279 +	pixman_composite_src_8888_8888_asm_neon (
   1.280 +		width, height,
   1.281 +		(uint32_t *)(((char *) dst_bits) +
   1.282 +		dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
   1.283 +		(uint32_t *)(((char *) src_bits) +
   1.284 +		src_y * src_stride * 4 + src_x * 4), src_stride);
   1.285 +	return TRUE;
   1.286 +    default:
   1.287 +	return FALSE;
   1.288 +    }
   1.289 +}
   1.290 +
   1.291 +static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
   1.292 +{
   1.293 +    pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0);
   1.294 +}
   1.295 +
   1.296 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER,
   1.297 +			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.298 +			       uint32_t, uint32_t, uint16_t,
   1.299 +			       COVER, FLAG_NONE)
   1.300 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER,
   1.301 +			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.302 +			       uint32_t, uint32_t, uint16_t,
   1.303 +			       PAD, FLAG_NONE)
   1.304 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER,
   1.305 +			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.306 +			       uint32_t, uint32_t, uint16_t,
   1.307 +			       NONE, FLAG_NONE)
   1.308 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER,
   1.309 +			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   1.310 +			       uint32_t, uint32_t, uint16_t,
   1.311 +			       NORMAL, FLAG_NONE)
   1.312 +
   1.313 +static const pixman_fast_path_t arm_neon_fast_paths[] =
   1.314 +{
   1.315 +    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
   1.316 +    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
   1.317 +    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
   1.318 +    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
   1.319 +    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
   1.320 +    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
   1.321 +    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     a8r8g8b8, neon_composite_src_0565_8888),
   1.322 +    PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     x8r8g8b8, neon_composite_src_0565_8888),
   1.323 +    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     a8b8g8r8, neon_composite_src_0565_8888),
   1.324 +    PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     x8b8g8r8, neon_composite_src_0565_8888),
   1.325 +    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
   1.326 +    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
   1.327 +    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
   1.328 +    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
   1.329 +    PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
   1.330 +    PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
   1.331 +    PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
   1.332 +    PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
   1.333 +    PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
   1.334 +    PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
   1.335 +    PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
   1.336 +    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
   1.337 +    PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8b8g8r8, neon_composite_src_rpixbuf_8888),
   1.338 +    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8r8g8b8, neon_composite_src_rpixbuf_8888),
   1.339 +    PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8b8g8r8, neon_composite_src_pixbuf_8888),
   1.340 +    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8r8g8b8, neon_composite_src_n_8_8888),
   1.341 +    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8r8g8b8, neon_composite_src_n_8_8888),
   1.342 +    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8b8g8r8, neon_composite_src_n_8_8888),
   1.343 +    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8b8g8r8, neon_composite_src_n_8_8888),
   1.344 +    PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8,       neon_composite_src_n_8_8),
   1.345 +
   1.346 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
   1.347 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
   1.348 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
   1.349 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
   1.350 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
   1.351 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
   1.352 +    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
   1.353 +    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
   1.354 +    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
   1.355 +    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
   1.356 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
   1.357 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
   1.358 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
   1.359 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
   1.360 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   neon_composite_over_n_8888_0565_ca),
   1.361 +    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   neon_composite_over_n_8888_0565_ca),
   1.362 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
   1.363 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
   1.364 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
   1.365 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
   1.366 +    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
   1.367 +    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
   1.368 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
   1.369 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
   1.370 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
   1.371 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
   1.372 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
   1.373 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
   1.374 +    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
   1.375 +    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
   1.376 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
   1.377 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
   1.378 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
   1.379 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
   1.380 +    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
   1.381 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
   1.382 +    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
   1.383 +    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
   1.384 +    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
   1.385 +    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
   1.386 +    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
   1.387 +    PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
   1.388 +    PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
   1.389 +    PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
   1.390 +    PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
   1.391 +    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
   1.392 +    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
   1.393 +    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
   1.394 +    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
   1.395 +    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
   1.396 +    PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
   1.397 +    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
   1.398 +    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
   1.399 +    PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
   1.400 +    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
   1.401 +    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
   1.402 +    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
   1.403 +    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, b5g6r5,   neon_composite_out_reverse_8_0565),
   1.404 +    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8r8g8b8, neon_composite_out_reverse_8_8888),
   1.405 +    PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8b8g8r8, neon_composite_out_reverse_8_8888),
   1.406 +
   1.407 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   1.408 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
   1.409 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   1.410 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
   1.411 +
   1.412 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
   1.413 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
   1.414 +
   1.415 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
   1.416 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
   1.417 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
   1.418 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
   1.419 +
   1.420 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
   1.421 +    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
   1.422 +    /* Note: NONE repeat is not supported yet */
   1.423 +    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
   1.424 +    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
   1.425 +    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
   1.426 +    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
   1.427 +
   1.428 +    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
   1.429 +    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
   1.430 +
   1.431 +    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
   1.432 +    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
   1.433 +
   1.434 +    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   1.435 +    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   1.436 +    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
   1.437 +
   1.438 +    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
   1.439 +    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
   1.440 +
   1.441 +    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
   1.442 +    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
   1.443 +
   1.444 +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   1.445 +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   1.446 +
   1.447 +    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   1.448 +    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   1.449 +
   1.450 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   1.451 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   1.452 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   1.453 +
   1.454 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565),
   1.455 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565),
   1.456 +
   1.457 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888),
   1.458 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
   1.459 +
   1.460 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   1.461 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   1.462 +
   1.463 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   1.464 +    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   1.465 +
   1.466 +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
   1.467 +
   1.468 +    { PIXMAN_OP_NONE },
   1.469 +};
   1.470 +
   1.471 +#define BIND_COMBINE_U(name)                                             \
   1.472 +void                                                                     \
   1.473 +pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
   1.474 +                                                  const uint32_t *dst,   \
   1.475 +                                                  const uint32_t *src,   \
   1.476 +                                                  const uint32_t *mask); \
   1.477 +                                                                         \
   1.478 +void                                                                     \
   1.479 +pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
   1.480 +                                             const uint32_t *dst,        \
   1.481 +                                             const uint32_t *src);       \
   1.482 +                                                                         \
   1.483 +static void                                                              \
   1.484 +neon_combine_##name##_u (pixman_implementation_t *imp,                   \
   1.485 +                         pixman_op_t              op,                    \
   1.486 +                         uint32_t *               dest,                  \
   1.487 +                         const uint32_t *         src,                   \
   1.488 +                         const uint32_t *         mask,                  \
   1.489 +                         int                      width)                 \
   1.490 +{                                                                        \
   1.491 +    if (mask)                                                            \
   1.492 +	pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
   1.493 +	                                                  src, mask);    \
   1.494 +    else                                                                 \
   1.495 +	pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
   1.496 +}
   1.497 +
   1.498 +BIND_COMBINE_U (over)
   1.499 +BIND_COMBINE_U (add)
   1.500 +BIND_COMBINE_U (out_reverse)
   1.501 +
   1.502 +pixman_implementation_t *
   1.503 +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
   1.504 +{
   1.505 +    pixman_implementation_t *imp =
   1.506 +	_pixman_implementation_create (fallback, arm_neon_fast_paths);
   1.507 +
   1.508 +    imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
   1.509 +    imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
   1.510 +    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
   1.511 +
   1.512 +    imp->blt = arm_neon_blt;
   1.513 +    imp->fill = arm_neon_fill;
   1.514 +
   1.515 +    return imp;
   1.516 +}

mercurial