1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,513 @@ 1.4 +/* 1.5 + * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy 1.6 + * 1.7 + * Permission to use, copy, modify, distribute, and sell this software and its 1.8 + * documentation for any purpose is hereby granted without fee, provided that 1.9 + * the above copyright notice appear in all copies and that both that 1.10 + * copyright notice and this permission notice appear in supporting 1.11 + * documentation, and that the name of ARM Ltd not be used in 1.12 + * advertising or publicity pertaining to distribution of the software without 1.13 + * specific, written prior permission. ARM Ltd makes no 1.14 + * representations about the suitability of this software for any purpose. It 1.15 + * is provided "as is" without express or implied warranty. 1.16 + * 1.17 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 1.18 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 1.19 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 1.20 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1.21 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 1.22 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 1.23 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 1.24 + * SOFTWARE. 1.25 + * 1.26 + * Author: Ian Rickards (ian.rickards@arm.com) 1.27 + * Author: Jonathan Morton (jonathan.morton@movial.com) 1.28 + * Author: Markku Vire (markku.vire@movial.com) 1.29 + * 1.30 + */ 1.31 + 1.32 +#ifdef HAVE_CONFIG_H 1.33 +#include <config.h> 1.34 +#endif 1.35 + 1.36 +#include <string.h> 1.37 +#include "pixman-private.h" 1.38 +#include "pixman-arm-common.h" 1.39 + 1.40 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888, 1.41 + uint32_t, 1, uint32_t, 1) 1.42 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888, 1.43 + uint32_t, 1, uint32_t, 1) 1.44 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565, 1.45 + uint16_t, 1, uint16_t, 1) 1.46 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888, 1.47 + uint8_t, 3, uint8_t, 3) 1.48 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565, 1.49 + uint32_t, 1, uint16_t, 1) 1.50 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888, 1.51 + uint16_t, 1, uint32_t, 1) 1.52 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev, 1.53 + uint8_t, 3, uint32_t, 1) 1.54 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev, 1.55 + uint8_t, 3, uint16_t, 1) 1.56 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888, 1.57 + uint32_t, 1, uint32_t, 1) 1.58 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888, 1.59 + uint32_t, 1, uint32_t, 1) 1.60 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8, 1.61 + uint8_t, 1, uint8_t, 1) 1.62 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, 1.63 + uint32_t, 1, uint32_t, 1) 1.64 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, 1.65 + uint32_t, 1, uint16_t, 1) 1.66 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, 1.67 + uint32_t, 1, uint32_t, 1) 1.68 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, 1.69 + uint8_t, 1, uint16_t, 1) 1.70 +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888, 1.71 + uint8_t, 1, uint32_t, 1) 1.72 + 1.73 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, 1.74 + uint16_t, 1) 1.75 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, 1.76 + uint32_t, 1) 1.77 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, 1.78 + uint32_t, 1) 1.79 +PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, 1.80 + uint8_t, 1) 1.81 + 1.82 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565, 1.83 + uint8_t, 1, uint16_t, 1) 1.84 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888, 1.85 + uint8_t, 1, uint32_t, 1) 1.86 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca, 1.87 + uint32_t, 1, uint32_t, 1) 1.88 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca, 1.89 + uint32_t, 1, uint16_t, 1) 1.90 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, 1.91 + uint8_t, 1, uint8_t, 1) 1.92 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, 1.93 + uint8_t, 1, uint8_t, 1) 1.94 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, 1.95 + uint8_t, 1, uint32_t, 1) 1.96 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888, 1.97 + uint8_t, 1, uint32_t, 1) 1.98 +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8, 1.99 + uint8_t, 1, uint8_t, 1) 1.100 + 1.101 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, 1.102 + uint32_t, 1, uint32_t, 1) 1.103 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, 1.104 + uint32_t, 1, uint16_t, 1) 1.105 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, 1.106 + uint16_t, 1, uint16_t, 1) 1.107 +PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, 1.108 + uint32_t, 1, uint32_t, 1) 1.109 + 1.110 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, 1.111 + uint8_t, 1, uint8_t, 1, uint8_t, 1) 1.112 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, 1.113 + uint16_t, 1, uint8_t, 1, uint16_t, 1) 1.114 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888, 1.115 + uint32_t, 1, uint8_t, 1, uint32_t, 1) 1.116 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, 1.117 + uint32_t, 1, uint32_t, 1, uint32_t, 1) 1.118 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, 1.119 + uint32_t, 1, uint8_t, 1, uint32_t, 1) 1.120 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888, 1.121 + uint32_t, 1, uint32_t, 1, uint32_t, 1) 1.122 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, 1.123 + uint32_t, 1, uint8_t, 1, uint16_t, 1) 1.124 +PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, 1.125 + uint16_t, 1, uint8_t, 1, uint16_t, 1) 1.126 + 1.127 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER, 1.128 + uint32_t, uint32_t) 1.129 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER, 1.130 + uint32_t, uint16_t) 1.131 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, 1.132 + uint32_t, uint16_t) 1.133 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, 1.134 + uint16_t, uint32_t) 1.135 + 1.136 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, 1.137 + OVER, uint32_t, uint16_t) 1.138 +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, 1.139 + OVER, uint16_t, uint16_t) 1.140 + 1.141 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, 1.142 + uint32_t, uint32_t) 1.143 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, 1.144 + uint32_t, uint16_t) 1.145 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, 1.146 + uint16_t, uint32_t) 1.147 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, 1.148 + uint16_t, uint16_t) 1.149 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER, 1.150 + uint32_t, uint32_t) 1.151 +static force_inline void 1.152 +pixman_scaled_bilinear_scanline_8888_8888_SRC ( 1.153 + uint32_t * dst, 1.154 + const uint32_t * mask, 1.155 + const uint32_t * src_top, 1.156 + const uint32_t * src_bottom, 1.157 + int32_t w, 1.158 + int wt, 1.159 + int wb, 1.160 + pixman_fixed_t vx, 1.161 + pixman_fixed_t unit_x, 1.162 + pixman_fixed_t max_vx, 1.163 + pixman_bool_t zero_src) 1.164 +{ 1.165 + pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w); 1.166 +} 1.167 + 1.168 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD, 1.169 + uint32_t, uint32_t) 1.170 + 1.171 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC, 1.172 + uint32_t, uint32_t) 1.173 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC, 1.174 + uint32_t, uint16_t) 1.175 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC, 1.176 + uint16_t, uint32_t) 1.177 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC, 1.178 + uint16_t, uint16_t) 1.179 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER, 1.180 + uint32_t, uint32_t) 1.181 +PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD, 1.182 + uint32_t, uint32_t) 1.183 + 1.184 +void 1.185 +pixman_composite_src_n_8_asm_neon (int32_t w, 1.186 + int32_t h, 1.187 + uint8_t *dst, 1.188 + int32_t dst_stride, 1.189 + uint8_t src); 1.190 + 1.191 +void 1.192 +pixman_composite_src_n_0565_asm_neon (int32_t w, 1.193 + int32_t h, 1.194 + uint16_t *dst, 1.195 + int32_t dst_stride, 1.196 + uint16_t src); 1.197 + 1.198 +void 1.199 +pixman_composite_src_n_8888_asm_neon (int32_t w, 1.200 + int32_t h, 1.201 + uint32_t *dst, 1.202 + int32_t dst_stride, 1.203 + uint32_t src); 1.204 + 1.205 +static pixman_bool_t 1.206 +arm_neon_fill (pixman_implementation_t *imp, 1.207 + uint32_t * bits, 1.208 + int stride, 1.209 + int bpp, 1.210 + int x, 1.211 + int y, 1.212 + int width, 1.213 + int height, 1.214 + uint32_t _xor) 1.215 +{ 1.216 + /* stride is always multiple of 32bit units in pixman */ 1.217 + uint32_t byte_stride = stride * sizeof(uint32_t); 1.218 + 1.219 + switch (bpp) 1.220 + { 1.221 + case 8: 1.222 + pixman_composite_src_n_8_asm_neon ( 1.223 + width, 1.224 + height, 1.225 + (uint8_t *)(((char *) bits) + y * byte_stride + x), 1.226 + byte_stride, 1.227 + _xor & 0xff); 1.228 + return TRUE; 1.229 + case 16: 1.230 + pixman_composite_src_n_0565_asm_neon ( 1.231 + width, 1.232 + height, 1.233 + (uint16_t *)(((char *) bits) + y * byte_stride + x * 2), 1.234 + byte_stride / 2, 1.235 + _xor & 0xffff); 1.236 + return TRUE; 1.237 + case 32: 1.238 + pixman_composite_src_n_8888_asm_neon ( 1.239 + width, 1.240 + height, 1.241 + (uint32_t *)(((char *) bits) + y * byte_stride + x * 4), 1.242 + byte_stride / 4, 1.243 + _xor); 1.244 + return TRUE; 1.245 + default: 1.246 + return FALSE; 1.247 + } 1.248 +} 1.249 + 1.250 +static pixman_bool_t 1.251 +arm_neon_blt (pixman_implementation_t *imp, 1.252 + uint32_t * src_bits, 1.253 + uint32_t * dst_bits, 1.254 + int src_stride, 1.255 + int dst_stride, 1.256 + int src_bpp, 1.257 + int dst_bpp, 1.258 + int src_x, 1.259 + int src_y, 1.260 + int dest_x, 1.261 + int dest_y, 1.262 + int width, 1.263 + int height) 1.264 +{ 1.265 + if (src_bpp != dst_bpp) 1.266 + return FALSE; 1.267 + 1.268 + switch (src_bpp) 1.269 + { 1.270 + case 16: 1.271 + pixman_composite_src_0565_0565_asm_neon ( 1.272 + width, height, 1.273 + (uint16_t *)(((char *) dst_bits) + 1.274 + dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, 1.275 + (uint16_t *)(((char *) src_bits) + 1.276 + src_y * src_stride * 4 + src_x * 2), src_stride * 2); 1.277 + return TRUE; 1.278 + case 32: 1.279 + pixman_composite_src_8888_8888_asm_neon ( 1.280 + width, height, 1.281 + (uint32_t *)(((char *) dst_bits) + 1.282 + dest_y * dst_stride * 4 + dest_x * 4), dst_stride, 1.283 + (uint32_t *)(((char *) src_bits) + 1.284 + src_y * src_stride * 4 + src_x * 4), src_stride); 1.285 + return TRUE; 1.286 + default: 1.287 + return FALSE; 1.288 + } 1.289 +} 1.290 + 1.291 +static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width) 1.292 +{ 1.293 + pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0); 1.294 +} 1.295 + 1.296 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER, 1.297 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, 1.298 + uint32_t, uint32_t, uint16_t, 1.299 + COVER, FLAG_NONE) 1.300 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER, 1.301 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, 1.302 + uint32_t, uint32_t, uint16_t, 1.303 + PAD, FLAG_NONE) 1.304 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER, 1.305 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, 1.306 + uint32_t, uint32_t, uint16_t, 1.307 + NONE, FLAG_NONE) 1.308 +FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER, 1.309 + pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565, 1.310 + uint32_t, uint32_t, uint16_t, 1.311 + NORMAL, FLAG_NONE) 1.312 + 1.313 +static const pixman_fast_path_t arm_neon_fast_paths[] = 1.314 +{ 1.315 + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), 1.316 + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, neon_composite_src_0565_0565), 1.317 + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), 1.318 + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, neon_composite_src_8888_0565), 1.319 + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), 1.320 + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, neon_composite_src_8888_0565), 1.321 + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, a8r8g8b8, neon_composite_src_0565_8888), 1.322 + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, neon_composite_src_0565_8888), 1.323 + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, neon_composite_src_0565_8888), 1.324 + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, neon_composite_src_0565_8888), 1.325 + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), 1.326 + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, neon_composite_src_8888_8888), 1.327 + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), 1.328 + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, neon_composite_src_8888_8888), 1.329 + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, neon_composite_src_8888_8888), 1.330 + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, neon_composite_src_8888_8888), 1.331 + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), 1.332 + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), 1.333 + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), 1.334 + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), 1.335 + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), 1.336 + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), 1.337 + PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), 1.338 + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), 1.339 + PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), 1.340 + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, neon_composite_src_n_8_8888), 1.341 + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888), 1.342 + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888), 1.343 + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888), 1.344 + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8), 1.345 + 1.346 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), 1.347 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), 1.348 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), 1.349 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), 1.350 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), 1.351 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), 1.352 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), 1.353 + PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), 1.354 + PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, neon_composite_over_n_8888), 1.355 + PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, neon_composite_over_n_8888), 1.356 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca), 1.357 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca), 1.358 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca), 1.359 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca), 1.360 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca), 1.361 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca), 1.362 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888), 1.363 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888), 1.364 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565), 1.365 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, b5g6r5, neon_composite_over_8888_n_0565), 1.366 + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, solid, r5g6b5, neon_composite_over_0565_n_0565), 1.367 + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, solid, b5g6r5, neon_composite_over_0565_n_0565), 1.368 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, neon_composite_over_8888_8_8888), 1.369 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, neon_composite_over_8888_8_8888), 1.370 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, neon_composite_over_8888_8_8888), 1.371 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888), 1.372 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565), 1.373 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565), 1.374 + PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565), 1.375 + PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565), 1.376 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888), 1.377 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565), 1.378 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565), 1.379 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, neon_composite_over_8888_8888), 1.380 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, neon_composite_over_8888_8888), 1.381 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, neon_composite_over_8888_8888), 1.382 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, neon_composite_over_8888_8888), 1.383 + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, a8r8g8b8, neon_composite_src_x888_8888), 1.384 + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), 1.385 + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), 1.386 + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, neon_composite_add_n_8_8888), 1.387 + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, neon_composite_add_n_8_8888), 1.388 + PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), 1.389 + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), 1.390 + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), 1.391 + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8, a8r8g8b8, neon_composite_add_8888_8_8888), 1.392 + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, a8, a8b8g8r8, neon_composite_add_8888_8_8888), 1.393 + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), 1.394 + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, solid, a8r8g8b8, neon_composite_add_8888_n_8888), 1.395 + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, solid, a8b8g8r8, neon_composite_add_8888_n_8888), 1.396 + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), 1.397 + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), 1.398 + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), 1.399 + PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), 1.400 + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), 1.401 + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), 1.402 + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), 1.403 + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), 1.404 + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888), 1.405 + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888), 1.406 + 1.407 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), 1.408 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), 1.409 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), 1.410 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), 1.411 + 1.412 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), 1.413 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), 1.414 + 1.415 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), 1.416 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), 1.417 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565), 1.418 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565), 1.419 + 1.420 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888), 1.421 + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888), 1.422 + /* Note: NONE repeat is not supported yet */ 1.423 + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), 1.424 + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), 1.425 + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), 1.426 + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), 1.427 + 1.428 + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), 1.429 + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), 1.430 + 1.431 + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), 1.432 + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), 1.433 + 1.434 + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), 1.435 + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), 1.436 + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), 1.437 + 1.438 + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), 1.439 + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), 1.440 + 1.441 + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), 1.442 + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), 1.443 + 1.444 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), 1.445 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), 1.446 + 1.447 + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888), 1.448 + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888), 1.449 + 1.450 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), 1.451 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), 1.452 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888), 1.453 + 1.454 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565), 1.455 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565), 1.456 + 1.457 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888), 1.458 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565), 1.459 + 1.460 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), 1.461 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), 1.462 + 1.463 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888), 1.464 + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888), 1.465 + 1.466 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), 1.467 + 1.468 + { PIXMAN_OP_NONE }, 1.469 +}; 1.470 + 1.471 +#define BIND_COMBINE_U(name) \ 1.472 +void \ 1.473 +pixman_composite_scanline_##name##_mask_asm_neon (int32_t w, \ 1.474 + const uint32_t *dst, \ 1.475 + const uint32_t *src, \ 1.476 + const uint32_t *mask); \ 1.477 + \ 1.478 +void \ 1.479 +pixman_composite_scanline_##name##_asm_neon (int32_t w, \ 1.480 + const uint32_t *dst, \ 1.481 + const uint32_t *src); \ 1.482 + \ 1.483 +static void \ 1.484 +neon_combine_##name##_u (pixman_implementation_t *imp, \ 1.485 + pixman_op_t op, \ 1.486 + uint32_t * dest, \ 1.487 + const uint32_t * src, \ 1.488 + const uint32_t * mask, \ 1.489 + int width) \ 1.490 +{ \ 1.491 + if (mask) \ 1.492 + pixman_composite_scanline_##name##_mask_asm_neon (width, dest, \ 1.493 + src, mask); \ 1.494 + else \ 1.495 + pixman_composite_scanline_##name##_asm_neon (width, dest, src); \ 1.496 +} 1.497 + 1.498 +BIND_COMBINE_U (over) 1.499 +BIND_COMBINE_U (add) 1.500 +BIND_COMBINE_U (out_reverse) 1.501 + 1.502 +pixman_implementation_t * 1.503 +_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback) 1.504 +{ 1.505 + pixman_implementation_t *imp = 1.506 + _pixman_implementation_create (fallback, arm_neon_fast_paths); 1.507 + 1.508 + imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u; 1.509 + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; 1.510 + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; 1.511 + 1.512 + imp->blt = arm_neon_blt; 1.513 + imp->fill = arm_neon_fill; 1.514 + 1.515 + return imp; 1.516 +}