gfx/cairo/libpixman/src/pixman-arm-neon.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  * Copyright © 2009 ARM Ltd, Movial Creative Technologies Oy
     3  *
     4  * Permission to use, copy, modify, distribute, and sell this software and its
     5  * documentation for any purpose is hereby granted without fee, provided that
     6  * the above copyright notice appear in all copies and that both that
     7  * copyright notice and this permission notice appear in supporting
     8  * documentation, and that the name of ARM Ltd not be used in
     9  * advertising or publicity pertaining to distribution of the software without
    10  * specific, written prior permission.  ARM Ltd makes no
    11  * representations about the suitability of this software for any purpose.  It
    12  * is provided "as is" without express or implied warranty.
    13  *
    14  * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
    15  * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
    16  * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
    17  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
    19  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
    20  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
    21  * SOFTWARE.
    22  *
    23  * Author:  Ian Rickards (ian.rickards@arm.com)
    24  * Author:  Jonathan Morton (jonathan.morton@movial.com)
    25  * Author:  Markku Vire (markku.vire@movial.com)
    26  *
    27  */
    29 #ifdef HAVE_CONFIG_H
    30 #include <config.h>
    31 #endif
    33 #include <string.h>
    34 #include "pixman-private.h"
    35 #include "pixman-arm-common.h"
    37 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
    38                                    uint32_t, 1, uint32_t, 1)
    39 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
    40                                    uint32_t, 1, uint32_t, 1)
    41 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
    42                                    uint16_t, 1, uint16_t, 1)
    43 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
    44                                    uint8_t, 3, uint8_t, 3)
    45 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
    46                                    uint32_t, 1, uint16_t, 1)
    47 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
    48                                    uint16_t, 1, uint32_t, 1)
    49 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
    50                                    uint8_t, 3, uint32_t, 1)
    51 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
    52                                    uint8_t, 3, uint16_t, 1)
    53 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
    54                                    uint32_t, 1, uint32_t, 1)
    55 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_rpixbuf_8888,
    56                                    uint32_t, 1, uint32_t, 1)
    57 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8_8,
    58                                    uint8_t, 1, uint8_t, 1)
    59 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
    60                                    uint32_t, 1, uint32_t, 1)
    61 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
    62                                    uint32_t, 1, uint16_t, 1)
    63 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
    64                                    uint32_t, 1, uint32_t, 1)
    65 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
    66                                    uint8_t, 1, uint16_t, 1)
    67 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888,
    68                                    uint8_t, 1, uint32_t, 1)
    70 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565,
    71                                  uint16_t, 1)
    72 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888,
    73                                  uint32_t, 1)
    74 PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888,
    75                                  uint32_t, 1)
    76 PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8,
    77                                  uint8_t, 1)
    79 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_0565,
    80                                       uint8_t, 1, uint16_t, 1)
    81 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
    82                                       uint8_t, 1, uint32_t, 1)
    83 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
    84                                       uint32_t, 1, uint32_t, 1)
    85 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca,
    86 				      uint32_t, 1, uint16_t, 1)
    87 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
    88                                       uint8_t, 1, uint8_t, 1)
    89 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
    90                                       uint8_t, 1, uint8_t, 1)
    91 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888,
    92                                       uint8_t, 1, uint32_t, 1)
    93 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888,
    94                                       uint8_t, 1, uint32_t, 1)
    95 PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8,
    96                                       uint8_t, 1, uint8_t, 1)
    98 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888,
    99                                      uint32_t, 1, uint32_t, 1)
   100 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565,
   101                                      uint32_t, 1, uint16_t, 1)
   102 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565,
   103                                      uint16_t, 1, uint16_t, 1)
   104 PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888,
   105                                      uint32_t, 1, uint32_t, 1)
   107 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
   108                                         uint8_t, 1, uint8_t, 1, uint8_t, 1)
   109 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
   110                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
   111 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8_8888,
   112                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
   113 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
   114                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
   115 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
   116                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
   117 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
   118                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
   119 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
   120                                         uint32_t, 1, uint8_t, 1, uint16_t, 1)
   121 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
   122                                         uint16_t, 1, uint8_t, 1, uint16_t, 1)
   124 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_8888, OVER,
   125                                         uint32_t, uint32_t)
   126 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, OVER,
   127                                         uint32_t, uint16_t)
   128 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
   129                                         uint32_t, uint16_t)
   130 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
   131                                         uint16_t, uint32_t)
   133 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
   134                                            OVER, uint32_t, uint16_t)
   135 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
   136                                            OVER, uint16_t, uint16_t)
   138 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
   139                                          uint32_t, uint32_t)
   140 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
   141                                          uint32_t, uint16_t)
   142 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
   143                                          uint16_t, uint32_t)
   144 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
   145                                          uint16_t, uint16_t)
   146 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, OVER,
   147                                          uint32_t, uint32_t)
   148 static force_inline void
   149 pixman_scaled_bilinear_scanline_8888_8888_SRC (
   150                                                 uint32_t *       dst,
   151                                                 const uint32_t * mask,
   152                                                 const uint32_t * src_top,
   153                                                 const uint32_t * src_bottom,
   154                                                 int32_t          w,
   155                                                 int              wt,
   156                                                 int              wb,
   157                                                 pixman_fixed_t   vx,
   158                                                 pixman_fixed_t   unit_x,
   159                                                 pixman_fixed_t   max_vx,
   160                                                 pixman_bool_t    zero_src)
   161 {
   162     pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, src_bottom, wt, wb, vx, unit_x, w);
   163 }
   165 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, neon, 8888_8888, ADD,
   166                                          uint32_t, uint32_t)
   168 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_8888, SRC,
   169                                             uint32_t, uint32_t)
   170 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 8888_8_0565, SRC,
   171                                             uint32_t, uint16_t)
   172 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_x888, SRC,
   173                                             uint16_t, uint32_t)
   174 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (0, neon, 0565_8_0565, SRC,
   175                                             uint16_t, uint16_t)
   176 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, OVER,
   177                                             uint32_t, uint32_t)
   178 PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_8888, ADD,
   179                                             uint32_t, uint32_t)
   181 void
   182 pixman_composite_src_n_8_asm_neon (int32_t   w,
   183                                    int32_t   h,
   184                                    uint8_t  *dst,
   185                                    int32_t   dst_stride,
   186                                    uint8_t   src);
   188 void
   189 pixman_composite_src_n_0565_asm_neon (int32_t   w,
   190                                       int32_t   h,
   191                                       uint16_t *dst,
   192                                       int32_t   dst_stride,
   193                                       uint16_t  src);
   195 void
   196 pixman_composite_src_n_8888_asm_neon (int32_t   w,
   197                                       int32_t   h,
   198                                       uint32_t *dst,
   199                                       int32_t   dst_stride,
   200                                       uint32_t  src);
   202 static pixman_bool_t
   203 arm_neon_fill (pixman_implementation_t *imp,
   204                uint32_t *               bits,
   205                int                      stride,
   206                int                      bpp,
   207                int                      x,
   208                int                      y,
   209                int                      width,
   210                int                      height,
   211 	       uint32_t                 _xor)
   212 {
   213     /* stride is always multiple of 32bit units in pixman */
   214     uint32_t byte_stride = stride * sizeof(uint32_t);
   216     switch (bpp)
   217     {
   218     case 8:
   219 	pixman_composite_src_n_8_asm_neon (
   220 		width,
   221 		height,
   222 		(uint8_t *)(((char *) bits) + y * byte_stride + x),
   223 		byte_stride,
   224 		_xor & 0xff);
   225 	return TRUE;
   226     case 16:
   227 	pixman_composite_src_n_0565_asm_neon (
   228 		width,
   229 		height,
   230 		(uint16_t *)(((char *) bits) + y * byte_stride + x * 2),
   231 		byte_stride / 2,
   232 		_xor & 0xffff);
   233 	return TRUE;
   234     case 32:
   235 	pixman_composite_src_n_8888_asm_neon (
   236 		width,
   237 		height,
   238 		(uint32_t *)(((char *) bits) + y * byte_stride + x * 4),
   239 		byte_stride / 4,
   240 		_xor);
   241 	return TRUE;
   242     default:
   243 	return FALSE;
   244     }
   245 }
   247 static pixman_bool_t
   248 arm_neon_blt (pixman_implementation_t *imp,
   249               uint32_t *               src_bits,
   250               uint32_t *               dst_bits,
   251               int                      src_stride,
   252               int                      dst_stride,
   253               int                      src_bpp,
   254               int                      dst_bpp,
   255               int                      src_x,
   256               int                      src_y,
   257               int                      dest_x,
   258               int                      dest_y,
   259               int                      width,
   260               int                      height)
   261 {
   262     if (src_bpp != dst_bpp)
   263 	return FALSE;
   265     switch (src_bpp)
   266     {
   267     case 16:
   268 	pixman_composite_src_0565_0565_asm_neon (
   269 		width, height,
   270 		(uint16_t *)(((char *) dst_bits) +
   271 		dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2,
   272 		(uint16_t *)(((char *) src_bits) +
   273 		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
   274 	return TRUE;
   275     case 32:
   276 	pixman_composite_src_8888_8888_asm_neon (
   277 		width, height,
   278 		(uint32_t *)(((char *) dst_bits) +
   279 		dest_y * dst_stride * 4 + dest_x * 4), dst_stride,
   280 		(uint32_t *)(((char *) src_bits) +
   281 		src_y * src_stride * 4 + src_x * 4), src_stride);
   282 	return TRUE;
   283     default:
   284 	return FALSE;
   285     }
   286 }
   288 static inline void op_bilinear_over_8888_0565(uint16_t *dst, const uint32_t *mask, const uint32_t *src, int width)
   289 {
   290     pixman_composite_over_8888_0565_asm_neon (width, 1, dst, 0, src, 0);
   291 }
   293 FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_cover_OVER,
   294 			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   295 			       uint32_t, uint32_t, uint16_t,
   296 			       COVER, FLAG_NONE)
   297 FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_pad_OVER,
   298 			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   299 			       uint32_t, uint32_t, uint16_t,
   300 			       PAD, FLAG_NONE)
   301 FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_none_OVER,
   302 			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   303 			       uint32_t, uint32_t, uint16_t,
   304 			       NONE, FLAG_NONE)
   305 FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_0565_normal_OVER,
   306 			       pixman_scaled_bilinear_scanline_8888_8888_SRC, op_bilinear_over_8888_0565,
   307 			       uint32_t, uint32_t, uint16_t,
   308 			       NORMAL, FLAG_NONE)
   310 static const pixman_fast_path_t arm_neon_fast_paths[] =
   311 {
   312     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
   313     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     b5g6r5,   neon_composite_src_0565_0565),
   314     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
   315     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     r5g6b5,   neon_composite_src_8888_0565),
   316     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
   317     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     b5g6r5,   neon_composite_src_8888_0565),
   318     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     a8r8g8b8, neon_composite_src_0565_8888),
   319     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     x8r8g8b8, neon_composite_src_0565_8888),
   320     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     a8b8g8r8, neon_composite_src_0565_8888),
   321     PIXMAN_STD_FAST_PATH (SRC,  b5g6r5,   null,     x8b8g8r8, neon_composite_src_0565_8888),
   322     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
   323     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     x8r8g8b8, neon_composite_src_8888_8888),
   324     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
   325     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     x8b8g8r8, neon_composite_src_8888_8888),
   326     PIXMAN_STD_FAST_PATH (SRC,  a8r8g8b8, null,     a8r8g8b8, neon_composite_src_8888_8888),
   327     PIXMAN_STD_FAST_PATH (SRC,  a8b8g8r8, null,     a8b8g8r8, neon_composite_src_8888_8888),
   328     PIXMAN_STD_FAST_PATH (SRC,  x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
   329     PIXMAN_STD_FAST_PATH (SRC,  x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
   330     PIXMAN_STD_FAST_PATH (SRC,  r8g8b8,   null,     r8g8b8,   neon_composite_src_0888_0888),
   331     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     x8r8g8b8, neon_composite_src_0888_8888_rev),
   332     PIXMAN_STD_FAST_PATH (SRC,  b8g8r8,   null,     r5g6b5,   neon_composite_src_0888_0565_rev),
   333     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8r8g8b8, neon_composite_src_pixbuf_8888),
   334     PIXMAN_STD_FAST_PATH (SRC,  pixbuf,   pixbuf,   a8b8g8r8, neon_composite_src_rpixbuf_8888),
   335     PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8r8g8b8, neon_composite_src_rpixbuf_8888),
   336     PIXMAN_STD_FAST_PATH (SRC,  rpixbuf,  rpixbuf,  a8b8g8r8, neon_composite_src_pixbuf_8888),
   337     PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8r8g8b8, neon_composite_src_n_8_8888),
   338     PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8r8g8b8, neon_composite_src_n_8_8888),
   339     PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8b8g8r8, neon_composite_src_n_8_8888),
   340     PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       x8b8g8r8, neon_composite_src_n_8_8888),
   341     PIXMAN_STD_FAST_PATH (SRC,  solid,    a8,       a8,       neon_composite_src_n_8_8),
   343     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8,       neon_composite_over_n_8_8),
   344     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   neon_composite_over_n_8_0565),
   345     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   neon_composite_over_n_8_0565),
   346     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, neon_composite_over_n_8_8888),
   347     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, neon_composite_over_n_8_8888),
   348     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, neon_composite_over_n_8_8888),
   349     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, neon_composite_over_n_8_8888),
   350     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   neon_composite_over_n_0565),
   351     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, neon_composite_over_n_8888),
   352     PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, neon_composite_over_n_8888),
   353     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, neon_composite_over_n_8888_8888_ca),
   354     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
   355     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
   356     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
   357     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   neon_composite_over_n_8888_0565_ca),
   358     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   neon_composite_over_n_8888_0565_ca),
   359     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, neon_composite_over_8888_n_8888),
   360     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, neon_composite_over_8888_n_8888),
   361     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   neon_composite_over_8888_n_0565),
   362     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid,    b5g6r5,   neon_composite_over_8888_n_0565),
   363     PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   solid,    r5g6b5,   neon_composite_over_0565_n_0565),
   364     PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   solid,    b5g6r5,   neon_composite_over_0565_n_0565),
   365     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, neon_composite_over_8888_8_8888),
   366     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
   367     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
   368     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
   369     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
   370     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
   371     PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
   372     PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
   373     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
   374     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
   375     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
   376     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     a8r8g8b8, neon_composite_over_8888_8888),
   377     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     x8r8g8b8, neon_composite_over_8888_8888),
   378     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     a8b8g8r8, neon_composite_over_8888_8888),
   379     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     x8b8g8r8, neon_composite_over_8888_8888),
   380     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null,     a8r8g8b8, neon_composite_src_x888_8888),
   381     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null,     a8b8g8r8, neon_composite_src_x888_8888),
   382     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8,       neon_composite_add_n_8_8),
   383     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8r8g8b8, neon_composite_add_n_8_8888),
   384     PIXMAN_STD_FAST_PATH (ADD,  solid,    a8,       a8b8g8r8, neon_composite_add_n_8_8888),
   385     PIXMAN_STD_FAST_PATH (ADD,  a8,       a8,       a8,       neon_composite_add_8_8_8),
   386     PIXMAN_STD_FAST_PATH (ADD,  r5g6b5,   a8,       r5g6b5,   neon_composite_add_0565_8_0565),
   387     PIXMAN_STD_FAST_PATH (ADD,  b5g6r5,   a8,       b5g6r5,   neon_composite_add_0565_8_0565),
   388     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8,       a8r8g8b8, neon_composite_add_8888_8_8888),
   389     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, a8,       a8b8g8r8, neon_composite_add_8888_8_8888),
   390     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
   391     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, neon_composite_add_8888_n_8888),
   392     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, neon_composite_add_8888_n_8888),
   393     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       neon_composite_add_8_8),
   394     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, neon_composite_add_8888_8888),
   395     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, neon_composite_add_8888_8888),
   396     PIXMAN_STD_FAST_PATH (IN,   solid,    null,     a8,       neon_composite_in_n_8),
   397     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
   398     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
   399     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, r5g6b5,   neon_composite_out_reverse_8_0565),
   400     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, b5g6r5,   neon_composite_out_reverse_8_0565),
   401     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8r8g8b8, neon_composite_out_reverse_8_8888),
   402     PIXMAN_STD_FAST_PATH (OUT_REVERSE,  a8,    null, a8b8g8r8, neon_composite_out_reverse_8_8888),
   404     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   405     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888),
   406     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   407     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888),
   409     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
   410     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565),
   412     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
   413     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
   414     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, b5g6r5, neon_8888_0565),
   415     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, b5g6r5, neon_8888_0565),
   417     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, neon_0565_8888),
   418     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8888),
   419     /* Note: NONE repeat is not supported yet */
   420     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
   421     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
   422     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
   423     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
   425     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
   426     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
   428     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
   429     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
   431     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   432     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   433     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
   435     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
   436     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
   438     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
   439     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
   441     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   442     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   444     SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8888),
   445     SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8888),
   447     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   448     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   449     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   451     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_8_0565),
   452     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_8_0565),
   454     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_8_x888),
   455     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_8_0565),
   457     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   458     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   460     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, neon_8888_8_8888),
   461     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, neon_8888_8_8888),
   463     SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565),
   465     { PIXMAN_OP_NONE },
   466 };
   468 #define BIND_COMBINE_U(name)                                             \
   469 void                                                                     \
   470 pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
   471                                                   const uint32_t *dst,   \
   472                                                   const uint32_t *src,   \
   473                                                   const uint32_t *mask); \
   474                                                                          \
   475 void                                                                     \
   476 pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
   477                                              const uint32_t *dst,        \
   478                                              const uint32_t *src);       \
   479                                                                          \
   480 static void                                                              \
   481 neon_combine_##name##_u (pixman_implementation_t *imp,                   \
   482                          pixman_op_t              op,                    \
   483                          uint32_t *               dest,                  \
   484                          const uint32_t *         src,                   \
   485                          const uint32_t *         mask,                  \
   486                          int                      width)                 \
   487 {                                                                        \
   488     if (mask)                                                            \
   489 	pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
   490 	                                                  src, mask);    \
   491     else                                                                 \
   492 	pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
   493 }
   495 BIND_COMBINE_U (over)
   496 BIND_COMBINE_U (add)
   497 BIND_COMBINE_U (out_reverse)
   499 pixman_implementation_t *
   500 _pixman_implementation_create_arm_neon (pixman_implementation_t *fallback)
   501 {
   502     pixman_implementation_t *imp =
   503 	_pixman_implementation_create (fallback, arm_neon_fast_paths);
   505     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
   506     imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
   507     imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u;
   509     imp->blt = arm_neon_blt;
   510     imp->fill = arm_neon_fill;
   512     return imp;
   513 }

mercurial