1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2590 @@ 1.4 +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ 1.5 +/* 1.6 + * Copyright © 2000 SuSE, Inc. 1.7 + * Copyright © 2007 Red Hat, Inc. 1.8 + * 1.9 + * Permission to use, copy, modify, distribute, and sell this software and its 1.10 + * documentation for any purpose is hereby granted without fee, provided that 1.11 + * the above copyright notice appear in all copies and that both that 1.12 + * copyright notice and this permission notice appear in supporting 1.13 + * documentation, and that the name of SuSE not be used in advertising or 1.14 + * publicity pertaining to distribution of the software without specific, 1.15 + * written prior permission. SuSE makes no representations about the 1.16 + * suitability of this software for any purpose. It is provided "as is" 1.17 + * without express or implied warranty. 1.18 + * 1.19 + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 1.20 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE 1.21 + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1.22 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 1.23 + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 1.24 + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1.25 + * 1.26 + * Author: Keith Packard, SuSE, Inc. 1.27 + */ 1.28 + 1.29 +#ifdef HAVE_CONFIG_H 1.30 +#include <config.h> 1.31 +#endif 1.32 +#include <string.h> 1.33 +#include <stdlib.h> 1.34 +#include "pixman-private.h" 1.35 +#include "pixman-combine32.h" 1.36 +#include "pixman-inlines.h" 1.37 + 1.38 +static force_inline uint32_t 1.39 +fetch_24 (uint8_t *a) 1.40 +{ 1.41 + if (((uintptr_t)a) & 1) 1.42 + { 1.43 +#ifdef WORDS_BIGENDIAN 1.44 + return (*a << 16) | (*(uint16_t *)(a + 1)); 1.45 +#else 1.46 + return *a | (*(uint16_t *)(a + 1) << 8); 1.47 +#endif 1.48 + } 1.49 + else 1.50 + { 1.51 +#ifdef WORDS_BIGENDIAN 1.52 + return (*(uint16_t *)a << 8) | *(a + 2); 1.53 +#else 1.54 + return *(uint16_t *)a | (*(a + 2) << 16); 1.55 +#endif 1.56 + } 1.57 +} 1.58 + 1.59 +static force_inline void 1.60 +store_24 (uint8_t *a, 1.61 + uint32_t v) 1.62 +{ 1.63 + if (((uintptr_t)a) & 1) 1.64 + { 1.65 +#ifdef WORDS_BIGENDIAN 1.66 + *a = (uint8_t) (v >> 16); 1.67 + *(uint16_t *)(a + 1) = (uint16_t) (v); 1.68 +#else 1.69 + *a = (uint8_t) (v); 1.70 + *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); 1.71 +#endif 1.72 + } 1.73 + else 1.74 + { 1.75 +#ifdef WORDS_BIGENDIAN 1.76 + *(uint16_t *)a = (uint16_t)(v >> 8); 1.77 + *(a + 2) = (uint8_t)v; 1.78 +#else 1.79 + *(uint16_t *)a = (uint16_t)v; 1.80 + *(a + 2) = (uint8_t)(v >> 16); 1.81 +#endif 1.82 + } 1.83 +} 1.84 + 1.85 +static force_inline uint32_t 1.86 +over (uint32_t src, 1.87 + uint32_t dest) 1.88 +{ 1.89 + uint32_t a = ~src >> 24; 1.90 + 1.91 + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); 1.92 + 1.93 + return dest; 1.94 +} 1.95 + 1.96 +static force_inline uint32_t 1.97 +in (uint32_t x, 1.98 + uint8_t y) 1.99 +{ 1.100 + uint16_t a = y; 1.101 + 1.102 + UN8x4_MUL_UN8 (x, a); 1.103 + 1.104 + return x; 1.105 +} 1.106 + 1.107 +/* 1.108 + * Naming convention: 1.109 + * 1.110 + * op_src_mask_dest 1.111 + */ 1.112 +static void 1.113 +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, 1.114 + pixman_composite_info_t *info) 1.115 +{ 1.116 + PIXMAN_COMPOSITE_ARGS (info); 1.117 + uint32_t *src, *src_line; 1.118 + uint32_t *dst, *dst_line; 1.119 + uint8_t *mask, *mask_line; 1.120 + int src_stride, mask_stride, dst_stride; 1.121 + uint8_t m; 1.122 + uint32_t s, d; 1.123 + int32_t w; 1.124 + 1.125 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.126 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 1.127 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 1.128 + 1.129 + while (height--) 1.130 + { 1.131 + src = src_line; 1.132 + src_line += src_stride; 1.133 + dst = dst_line; 1.134 + dst_line += dst_stride; 1.135 + mask = mask_line; 1.136 + mask_line += mask_stride; 1.137 + 1.138 + w = width; 1.139 + while (w--) 1.140 + { 1.141 + m = *mask++; 1.142 + if (m) 1.143 + { 1.144 + s = *src | 0xff000000; 1.145 + 1.146 + if (m == 0xff) 1.147 + { 1.148 + *dst = s; 1.149 + } 1.150 + else 1.151 + { 1.152 + d = in (s, m); 1.153 + *dst = over (d, *dst); 1.154 + } 1.155 + } 1.156 + src++; 1.157 + dst++; 1.158 + } 1.159 + } 1.160 +} 1.161 + 1.162 +static void 1.163 +fast_composite_in_n_8_8 (pixman_implementation_t *imp, 1.164 + pixman_composite_info_t *info) 1.165 +{ 1.166 + PIXMAN_COMPOSITE_ARGS (info); 1.167 + uint32_t src, srca; 1.168 + uint8_t *dst_line, *dst; 1.169 + uint8_t *mask_line, *mask, m; 1.170 + int dst_stride, mask_stride; 1.171 + int32_t w; 1.172 + uint16_t t; 1.173 + 1.174 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.175 + 1.176 + srca = src >> 24; 1.177 + 1.178 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 1.179 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 1.180 + 1.181 + if (srca == 0xff) 1.182 + { 1.183 + while (height--) 1.184 + { 1.185 + dst = dst_line; 1.186 + dst_line += dst_stride; 1.187 + mask = mask_line; 1.188 + mask_line += mask_stride; 1.189 + w = width; 1.190 + 1.191 + while (w--) 1.192 + { 1.193 + m = *mask++; 1.194 + 1.195 + if (m == 0) 1.196 + *dst = 0; 1.197 + else if (m != 0xff) 1.198 + *dst = MUL_UN8 (m, *dst, t); 1.199 + 1.200 + dst++; 1.201 + } 1.202 + } 1.203 + } 1.204 + else 1.205 + { 1.206 + while (height--) 1.207 + { 1.208 + dst = dst_line; 1.209 + dst_line += dst_stride; 1.210 + mask = mask_line; 1.211 + mask_line += mask_stride; 1.212 + w = width; 1.213 + 1.214 + while (w--) 1.215 + { 1.216 + m = *mask++; 1.217 + m = MUL_UN8 (m, srca, t); 1.218 + 1.219 + if (m == 0) 1.220 + *dst = 0; 1.221 + else if (m != 0xff) 1.222 + *dst = MUL_UN8 (m, *dst, t); 1.223 + 1.224 + dst++; 1.225 + } 1.226 + } 1.227 + } 1.228 +} 1.229 + 1.230 +static void 1.231 +fast_composite_in_8_8 (pixman_implementation_t *imp, 1.232 + pixman_composite_info_t *info) 1.233 +{ 1.234 + PIXMAN_COMPOSITE_ARGS (info); 1.235 + uint8_t *dst_line, *dst; 1.236 + uint8_t *src_line, *src; 1.237 + int dst_stride, src_stride; 1.238 + int32_t w; 1.239 + uint8_t s; 1.240 + uint16_t t; 1.241 + 1.242 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); 1.243 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 1.244 + 1.245 + while (height--) 1.246 + { 1.247 + dst = dst_line; 1.248 + dst_line += dst_stride; 1.249 + src = src_line; 1.250 + src_line += src_stride; 1.251 + w = width; 1.252 + 1.253 + while (w--) 1.254 + { 1.255 + s = *src++; 1.256 + 1.257 + if (s == 0) 1.258 + *dst = 0; 1.259 + else if (s != 0xff) 1.260 + *dst = MUL_UN8 (s, *dst, t); 1.261 + 1.262 + dst++; 1.263 + } 1.264 + } 1.265 +} 1.266 + 1.267 +static void 1.268 +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, 1.269 + pixman_composite_info_t *info) 1.270 +{ 1.271 + PIXMAN_COMPOSITE_ARGS (info); 1.272 + uint32_t src, srca; 1.273 + uint32_t *dst_line, *dst, d; 1.274 + uint8_t *mask_line, *mask, m; 1.275 + int dst_stride, mask_stride; 1.276 + int32_t w; 1.277 + 1.278 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.279 + 1.280 + srca = src >> 24; 1.281 + if (src == 0) 1.282 + return; 1.283 + 1.284 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.285 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 1.286 + 1.287 + while (height--) 1.288 + { 1.289 + dst = dst_line; 1.290 + dst_line += dst_stride; 1.291 + mask = mask_line; 1.292 + mask_line += mask_stride; 1.293 + w = width; 1.294 + 1.295 + while (w--) 1.296 + { 1.297 + m = *mask++; 1.298 + if (m == 0xff) 1.299 + { 1.300 + if (srca == 0xff) 1.301 + *dst = src; 1.302 + else 1.303 + *dst = over (src, *dst); 1.304 + } 1.305 + else if (m) 1.306 + { 1.307 + d = in (src, m); 1.308 + *dst = over (d, *dst); 1.309 + } 1.310 + dst++; 1.311 + } 1.312 + } 1.313 +} 1.314 + 1.315 +static void 1.316 +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, 1.317 + pixman_composite_info_t *info) 1.318 +{ 1.319 + PIXMAN_COMPOSITE_ARGS (info); 1.320 + uint32_t src, s; 1.321 + uint32_t *dst_line, *dst, d; 1.322 + uint32_t *mask_line, *mask, ma; 1.323 + int dst_stride, mask_stride; 1.324 + int32_t w; 1.325 + 1.326 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.327 + 1.328 + if (src == 0) 1.329 + return; 1.330 + 1.331 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.332 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); 1.333 + 1.334 + while (height--) 1.335 + { 1.336 + dst = dst_line; 1.337 + dst_line += dst_stride; 1.338 + mask = mask_line; 1.339 + mask_line += mask_stride; 1.340 + w = width; 1.341 + 1.342 + while (w--) 1.343 + { 1.344 + ma = *mask++; 1.345 + 1.346 + if (ma) 1.347 + { 1.348 + d = *dst; 1.349 + s = src; 1.350 + 1.351 + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); 1.352 + 1.353 + *dst = s; 1.354 + } 1.355 + 1.356 + dst++; 1.357 + } 1.358 + } 1.359 +} 1.360 + 1.361 +static void 1.362 +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, 1.363 + pixman_composite_info_t *info) 1.364 +{ 1.365 + PIXMAN_COMPOSITE_ARGS (info); 1.366 + uint32_t src, srca, s; 1.367 + uint32_t *dst_line, *dst, d; 1.368 + uint32_t *mask_line, *mask, ma; 1.369 + int dst_stride, mask_stride; 1.370 + int32_t w; 1.371 + 1.372 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.373 + 1.374 + srca = src >> 24; 1.375 + if (src == 0) 1.376 + return; 1.377 + 1.378 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.379 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); 1.380 + 1.381 + while (height--) 1.382 + { 1.383 + dst = dst_line; 1.384 + dst_line += dst_stride; 1.385 + mask = mask_line; 1.386 + mask_line += mask_stride; 1.387 + w = width; 1.388 + 1.389 + while (w--) 1.390 + { 1.391 + ma = *mask++; 1.392 + if (ma == 0xffffffff) 1.393 + { 1.394 + if (srca == 0xff) 1.395 + *dst = src; 1.396 + else 1.397 + *dst = over (src, *dst); 1.398 + } 1.399 + else if (ma) 1.400 + { 1.401 + d = *dst; 1.402 + s = src; 1.403 + 1.404 + UN8x4_MUL_UN8x4 (s, ma); 1.405 + UN8x4_MUL_UN8 (ma, srca); 1.406 + ma = ~ma; 1.407 + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); 1.408 + 1.409 + *dst = d; 1.410 + } 1.411 + 1.412 + dst++; 1.413 + } 1.414 + } 1.415 +} 1.416 + 1.417 +static void 1.418 +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, 1.419 + pixman_composite_info_t *info) 1.420 +{ 1.421 + PIXMAN_COMPOSITE_ARGS (info); 1.422 + uint32_t src, srca; 1.423 + uint8_t *dst_line, *dst; 1.424 + uint32_t d; 1.425 + uint8_t *mask_line, *mask, m; 1.426 + int dst_stride, mask_stride; 1.427 + int32_t w; 1.428 + 1.429 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.430 + 1.431 + srca = src >> 24; 1.432 + if (src == 0) 1.433 + return; 1.434 + 1.435 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); 1.436 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 1.437 + 1.438 + while (height--) 1.439 + { 1.440 + dst = dst_line; 1.441 + dst_line += dst_stride; 1.442 + mask = mask_line; 1.443 + mask_line += mask_stride; 1.444 + w = width; 1.445 + 1.446 + while (w--) 1.447 + { 1.448 + m = *mask++; 1.449 + if (m == 0xff) 1.450 + { 1.451 + if (srca == 0xff) 1.452 + { 1.453 + d = src; 1.454 + } 1.455 + else 1.456 + { 1.457 + d = fetch_24 (dst); 1.458 + d = over (src, d); 1.459 + } 1.460 + store_24 (dst, d); 1.461 + } 1.462 + else if (m) 1.463 + { 1.464 + d = over (in (src, m), fetch_24 (dst)); 1.465 + store_24 (dst, d); 1.466 + } 1.467 + dst += 3; 1.468 + } 1.469 + } 1.470 +} 1.471 + 1.472 +static void 1.473 +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, 1.474 + pixman_composite_info_t *info) 1.475 +{ 1.476 + PIXMAN_COMPOSITE_ARGS (info); 1.477 + uint32_t src, srca; 1.478 + uint16_t *dst_line, *dst; 1.479 + uint32_t d; 1.480 + uint8_t *mask_line, *mask, m; 1.481 + int dst_stride, mask_stride; 1.482 + int32_t w; 1.483 + 1.484 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.485 + 1.486 + srca = src >> 24; 1.487 + if (src == 0) 1.488 + return; 1.489 + 1.490 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 1.491 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 1.492 + 1.493 + while (height--) 1.494 + { 1.495 + dst = dst_line; 1.496 + dst_line += dst_stride; 1.497 + mask = mask_line; 1.498 + mask_line += mask_stride; 1.499 + w = width; 1.500 + 1.501 + while (w--) 1.502 + { 1.503 + m = *mask++; 1.504 + if (m == 0xff) 1.505 + { 1.506 + if (srca == 0xff) 1.507 + { 1.508 + d = src; 1.509 + } 1.510 + else 1.511 + { 1.512 + d = *dst; 1.513 + d = over (src, convert_0565_to_0888 (d)); 1.514 + } 1.515 + *dst = convert_8888_to_0565 (d); 1.516 + } 1.517 + else if (m) 1.518 + { 1.519 + d = *dst; 1.520 + d = over (in (src, m), convert_0565_to_0888 (d)); 1.521 + *dst = convert_8888_to_0565 (d); 1.522 + } 1.523 + dst++; 1.524 + } 1.525 + } 1.526 +} 1.527 + 1.528 +static void 1.529 +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, 1.530 + pixman_composite_info_t *info) 1.531 +{ 1.532 + PIXMAN_COMPOSITE_ARGS (info); 1.533 + uint32_t src, srca, s; 1.534 + uint16_t src16; 1.535 + uint16_t *dst_line, *dst; 1.536 + uint32_t d; 1.537 + uint32_t *mask_line, *mask, ma; 1.538 + int dst_stride, mask_stride; 1.539 + int32_t w; 1.540 + 1.541 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.542 + 1.543 + srca = src >> 24; 1.544 + if (src == 0) 1.545 + return; 1.546 + 1.547 + src16 = convert_8888_to_0565 (src); 1.548 + 1.549 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 1.550 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); 1.551 + 1.552 + while (height--) 1.553 + { 1.554 + dst = dst_line; 1.555 + dst_line += dst_stride; 1.556 + mask = mask_line; 1.557 + mask_line += mask_stride; 1.558 + w = width; 1.559 + 1.560 + while (w--) 1.561 + { 1.562 + ma = *mask++; 1.563 + if (ma == 0xffffffff) 1.564 + { 1.565 + if (srca == 0xff) 1.566 + { 1.567 + *dst = src16; 1.568 + } 1.569 + else 1.570 + { 1.571 + d = *dst; 1.572 + d = over (src, convert_0565_to_0888 (d)); 1.573 + *dst = convert_8888_to_0565 (d); 1.574 + } 1.575 + } 1.576 + else if (ma) 1.577 + { 1.578 + d = *dst; 1.579 + d = convert_0565_to_0888 (d); 1.580 + 1.581 + s = src; 1.582 + 1.583 + UN8x4_MUL_UN8x4 (s, ma); 1.584 + UN8x4_MUL_UN8 (ma, srca); 1.585 + ma = ~ma; 1.586 + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); 1.587 + 1.588 + *dst = convert_8888_to_0565 (d); 1.589 + } 1.590 + dst++; 1.591 + } 1.592 + } 1.593 +} 1.594 + 1.595 +static void 1.596 +fast_composite_over_8888_8888 (pixman_implementation_t *imp, 1.597 + pixman_composite_info_t *info) 1.598 +{ 1.599 + PIXMAN_COMPOSITE_ARGS (info); 1.600 + uint32_t *dst_line, *dst; 1.601 + uint32_t *src_line, *src, s; 1.602 + int dst_stride, src_stride; 1.603 + uint8_t a; 1.604 + int32_t w; 1.605 + 1.606 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.607 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 1.608 + 1.609 + while (height--) 1.610 + { 1.611 + dst = dst_line; 1.612 + dst_line += dst_stride; 1.613 + src = src_line; 1.614 + src_line += src_stride; 1.615 + w = width; 1.616 + 1.617 + while (w--) 1.618 + { 1.619 + s = *src++; 1.620 + a = s >> 24; 1.621 + if (a == 0xff) 1.622 + *dst = s; 1.623 + else if (s) 1.624 + *dst = over (s, *dst); 1.625 + dst++; 1.626 + } 1.627 + } 1.628 +} 1.629 + 1.630 +static void 1.631 +fast_composite_src_x888_8888 (pixman_implementation_t *imp, 1.632 + pixman_composite_info_t *info) 1.633 +{ 1.634 + PIXMAN_COMPOSITE_ARGS (info); 1.635 + uint32_t *dst_line, *dst; 1.636 + uint32_t *src_line, *src; 1.637 + int dst_stride, src_stride; 1.638 + int32_t w; 1.639 + 1.640 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.641 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 1.642 + 1.643 + while (height--) 1.644 + { 1.645 + dst = dst_line; 1.646 + dst_line += dst_stride; 1.647 + src = src_line; 1.648 + src_line += src_stride; 1.649 + w = width; 1.650 + 1.651 + while (w--) 1.652 + *dst++ = (*src++) | 0xff000000; 1.653 + } 1.654 +} 1.655 + 1.656 +#if 0 1.657 +static void 1.658 +fast_composite_over_8888_0888 (pixman_implementation_t *imp, 1.659 + pixman_composite_info_t *info) 1.660 +{ 1.661 + PIXMAN_COMPOSITE_ARGS (info); 1.662 + uint8_t *dst_line, *dst; 1.663 + uint32_t d; 1.664 + uint32_t *src_line, *src, s; 1.665 + uint8_t a; 1.666 + int dst_stride, src_stride; 1.667 + int32_t w; 1.668 + 1.669 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); 1.670 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 1.671 + 1.672 + while (height--) 1.673 + { 1.674 + dst = dst_line; 1.675 + dst_line += dst_stride; 1.676 + src = src_line; 1.677 + src_line += src_stride; 1.678 + w = width; 1.679 + 1.680 + while (w--) 1.681 + { 1.682 + s = *src++; 1.683 + a = s >> 24; 1.684 + if (a) 1.685 + { 1.686 + if (a == 0xff) 1.687 + d = s; 1.688 + else 1.689 + d = over (s, fetch_24 (dst)); 1.690 + 1.691 + store_24 (dst, d); 1.692 + } 1.693 + dst += 3; 1.694 + } 1.695 + } 1.696 +} 1.697 +#endif 1.698 + 1.699 +static void 1.700 +fast_composite_over_8888_0565 (pixman_implementation_t *imp, 1.701 + pixman_composite_info_t *info) 1.702 +{ 1.703 + PIXMAN_COMPOSITE_ARGS (info); 1.704 + uint16_t *dst_line, *dst; 1.705 + uint32_t d; 1.706 + uint32_t *src_line, *src, s; 1.707 + uint8_t a; 1.708 + int dst_stride, src_stride; 1.709 + int32_t w; 1.710 + 1.711 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 1.712 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 1.713 + 1.714 + while (height--) 1.715 + { 1.716 + dst = dst_line; 1.717 + dst_line += dst_stride; 1.718 + src = src_line; 1.719 + src_line += src_stride; 1.720 + w = width; 1.721 + 1.722 + while (w--) 1.723 + { 1.724 + s = *src++; 1.725 + a = s >> 24; 1.726 + if (s) 1.727 + { 1.728 + if (a == 0xff) 1.729 + { 1.730 + d = s; 1.731 + } 1.732 + else 1.733 + { 1.734 + d = *dst; 1.735 + d = over (s, convert_0565_to_0888 (d)); 1.736 + } 1.737 + *dst = convert_8888_to_0565 (d); 1.738 + } 1.739 + dst++; 1.740 + } 1.741 + } 1.742 +} 1.743 + 1.744 +static void 1.745 +fast_composite_add_8_8 (pixman_implementation_t *imp, 1.746 + pixman_composite_info_t *info) 1.747 +{ 1.748 + PIXMAN_COMPOSITE_ARGS (info); 1.749 + uint8_t *dst_line, *dst; 1.750 + uint8_t *src_line, *src; 1.751 + int dst_stride, src_stride; 1.752 + int32_t w; 1.753 + uint8_t s, d; 1.754 + uint16_t t; 1.755 + 1.756 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); 1.757 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 1.758 + 1.759 + while (height--) 1.760 + { 1.761 + dst = dst_line; 1.762 + dst_line += dst_stride; 1.763 + src = src_line; 1.764 + src_line += src_stride; 1.765 + w = width; 1.766 + 1.767 + while (w--) 1.768 + { 1.769 + s = *src++; 1.770 + if (s) 1.771 + { 1.772 + if (s != 0xff) 1.773 + { 1.774 + d = *dst; 1.775 + t = d + s; 1.776 + s = t | (0 - (t >> 8)); 1.777 + } 1.778 + *dst = s; 1.779 + } 1.780 + dst++; 1.781 + } 1.782 + } 1.783 +} 1.784 + 1.785 +static void 1.786 +fast_composite_add_0565_0565 (pixman_implementation_t *imp, 1.787 + pixman_composite_info_t *info) 1.788 +{ 1.789 + PIXMAN_COMPOSITE_ARGS (info); 1.790 + uint16_t *dst_line, *dst; 1.791 + uint32_t d; 1.792 + uint16_t *src_line, *src; 1.793 + uint32_t s; 1.794 + int dst_stride, src_stride; 1.795 + int32_t w; 1.796 + 1.797 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); 1.798 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 1.799 + 1.800 + while (height--) 1.801 + { 1.802 + dst = dst_line; 1.803 + dst_line += dst_stride; 1.804 + src = src_line; 1.805 + src_line += src_stride; 1.806 + w = width; 1.807 + 1.808 + while (w--) 1.809 + { 1.810 + s = *src++; 1.811 + if (s) 1.812 + { 1.813 + d = *dst; 1.814 + s = convert_0565_to_8888 (s); 1.815 + if (d) 1.816 + { 1.817 + d = convert_0565_to_8888 (d); 1.818 + UN8x4_ADD_UN8x4 (s, d); 1.819 + } 1.820 + *dst = convert_8888_to_0565 (s); 1.821 + } 1.822 + dst++; 1.823 + } 1.824 + } 1.825 +} 1.826 + 1.827 +static void 1.828 +fast_composite_add_8888_8888 (pixman_implementation_t *imp, 1.829 + pixman_composite_info_t *info) 1.830 +{ 1.831 + PIXMAN_COMPOSITE_ARGS (info); 1.832 + uint32_t *dst_line, *dst; 1.833 + uint32_t *src_line, *src; 1.834 + int dst_stride, src_stride; 1.835 + int32_t w; 1.836 + uint32_t s, d; 1.837 + 1.838 + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 1.839 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.840 + 1.841 + while (height--) 1.842 + { 1.843 + dst = dst_line; 1.844 + dst_line += dst_stride; 1.845 + src = src_line; 1.846 + src_line += src_stride; 1.847 + w = width; 1.848 + 1.849 + while (w--) 1.850 + { 1.851 + s = *src++; 1.852 + if (s) 1.853 + { 1.854 + if (s != 0xffffffff) 1.855 + { 1.856 + d = *dst; 1.857 + if (d) 1.858 + UN8x4_ADD_UN8x4 (s, d); 1.859 + } 1.860 + *dst = s; 1.861 + } 1.862 + dst++; 1.863 + } 1.864 + } 1.865 +} 1.866 + 1.867 +static void 1.868 +fast_composite_add_n_8_8 (pixman_implementation_t *imp, 1.869 + pixman_composite_info_t *info) 1.870 +{ 1.871 + PIXMAN_COMPOSITE_ARGS (info); 1.872 + uint8_t *dst_line, *dst; 1.873 + uint8_t *mask_line, *mask; 1.874 + int dst_stride, mask_stride; 1.875 + int32_t w; 1.876 + uint32_t src; 1.877 + uint8_t sa; 1.878 + 1.879 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 1.880 + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 1.881 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.882 + sa = (src >> 24); 1.883 + 1.884 + while (height--) 1.885 + { 1.886 + dst = dst_line; 1.887 + dst_line += dst_stride; 1.888 + mask = mask_line; 1.889 + mask_line += mask_stride; 1.890 + w = width; 1.891 + 1.892 + while (w--) 1.893 + { 1.894 + uint16_t tmp; 1.895 + uint16_t a; 1.896 + uint32_t m, d; 1.897 + uint32_t r; 1.898 + 1.899 + a = *mask++; 1.900 + d = *dst; 1.901 + 1.902 + m = MUL_UN8 (sa, a, tmp); 1.903 + r = ADD_UN8 (m, d, tmp); 1.904 + 1.905 + *dst++ = r; 1.906 + } 1.907 + } 1.908 +} 1.909 + 1.910 +#ifdef WORDS_BIGENDIAN 1.911 +#define CREATE_BITMASK(n) (0x80000000 >> (n)) 1.912 +#define UPDATE_BITMASK(n) ((n) >> 1) 1.913 +#else 1.914 +#define CREATE_BITMASK(n) (1 << (n)) 1.915 +#define UPDATE_BITMASK(n) ((n) << 1) 1.916 +#endif 1.917 + 1.918 +#define TEST_BIT(p, n) \ 1.919 + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) 1.920 +#define SET_BIT(p, n) \ 1.921 + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); 1.922 + 1.923 +static void 1.924 +fast_composite_add_1_1 (pixman_implementation_t *imp, 1.925 + pixman_composite_info_t *info) 1.926 +{ 1.927 + PIXMAN_COMPOSITE_ARGS (info); 1.928 + uint32_t *dst_line, *dst; 1.929 + uint32_t *src_line, *src; 1.930 + int dst_stride, src_stride; 1.931 + int32_t w; 1.932 + 1.933 + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, 1.934 + src_stride, src_line, 1); 1.935 + PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, 1.936 + dst_stride, dst_line, 1); 1.937 + 1.938 + while (height--) 1.939 + { 1.940 + dst = dst_line; 1.941 + dst_line += dst_stride; 1.942 + src = src_line; 1.943 + src_line += src_stride; 1.944 + w = width; 1.945 + 1.946 + while (w--) 1.947 + { 1.948 + /* 1.949 + * TODO: improve performance by processing uint32_t data instead 1.950 + * of individual bits 1.951 + */ 1.952 + if (TEST_BIT (src, src_x + w)) 1.953 + SET_BIT (dst, dest_x + w); 1.954 + } 1.955 + } 1.956 +} 1.957 + 1.958 +static void 1.959 +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, 1.960 + pixman_composite_info_t *info) 1.961 +{ 1.962 + PIXMAN_COMPOSITE_ARGS (info); 1.963 + uint32_t src, srca; 1.964 + uint32_t *dst, *dst_line; 1.965 + uint32_t *mask, *mask_line; 1.966 + int mask_stride, dst_stride; 1.967 + uint32_t bitcache, bitmask; 1.968 + int32_t w; 1.969 + 1.970 + if (width <= 0) 1.971 + return; 1.972 + 1.973 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.974 + srca = src >> 24; 1.975 + if (src == 0) 1.976 + return; 1.977 + 1.978 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, 1.979 + dst_stride, dst_line, 1); 1.980 + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, 1.981 + mask_stride, mask_line, 1); 1.982 + mask_line += mask_x >> 5; 1.983 + 1.984 + if (srca == 0xff) 1.985 + { 1.986 + while (height--) 1.987 + { 1.988 + dst = dst_line; 1.989 + dst_line += dst_stride; 1.990 + mask = mask_line; 1.991 + mask_line += mask_stride; 1.992 + w = width; 1.993 + 1.994 + bitcache = *mask++; 1.995 + bitmask = CREATE_BITMASK (mask_x & 31); 1.996 + 1.997 + while (w--) 1.998 + { 1.999 + if (bitmask == 0) 1.1000 + { 1.1001 + bitcache = *mask++; 1.1002 + bitmask = CREATE_BITMASK (0); 1.1003 + } 1.1004 + if (bitcache & bitmask) 1.1005 + *dst = src; 1.1006 + bitmask = UPDATE_BITMASK (bitmask); 1.1007 + dst++; 1.1008 + } 1.1009 + } 1.1010 + } 1.1011 + else 1.1012 + { 1.1013 + while (height--) 1.1014 + { 1.1015 + dst = dst_line; 1.1016 + dst_line += dst_stride; 1.1017 + mask = mask_line; 1.1018 + mask_line += mask_stride; 1.1019 + w = width; 1.1020 + 1.1021 + bitcache = *mask++; 1.1022 + bitmask = CREATE_BITMASK (mask_x & 31); 1.1023 + 1.1024 + while (w--) 1.1025 + { 1.1026 + if (bitmask == 0) 1.1027 + { 1.1028 + bitcache = *mask++; 1.1029 + bitmask = CREATE_BITMASK (0); 1.1030 + } 1.1031 + if (bitcache & bitmask) 1.1032 + *dst = over (src, *dst); 1.1033 + bitmask = UPDATE_BITMASK (bitmask); 1.1034 + dst++; 1.1035 + } 1.1036 + } 1.1037 + } 1.1038 +} 1.1039 + 1.1040 +static void 1.1041 +fast_composite_over_n_1_0565 (pixman_implementation_t *imp, 1.1042 + pixman_composite_info_t *info) 1.1043 +{ 1.1044 + PIXMAN_COMPOSITE_ARGS (info); 1.1045 + uint32_t src, srca; 1.1046 + uint16_t *dst, *dst_line; 1.1047 + uint32_t *mask, *mask_line; 1.1048 + int mask_stride, dst_stride; 1.1049 + uint32_t bitcache, bitmask; 1.1050 + int32_t w; 1.1051 + uint32_t d; 1.1052 + uint16_t src565; 1.1053 + 1.1054 + if (width <= 0) 1.1055 + return; 1.1056 + 1.1057 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.1058 + srca = src >> 24; 1.1059 + if (src == 0) 1.1060 + return; 1.1061 + 1.1062 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, 1.1063 + dst_stride, dst_line, 1); 1.1064 + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, 1.1065 + mask_stride, mask_line, 1); 1.1066 + mask_line += mask_x >> 5; 1.1067 + 1.1068 + if (srca == 0xff) 1.1069 + { 1.1070 + src565 = convert_8888_to_0565 (src); 1.1071 + while (height--) 1.1072 + { 1.1073 + dst = dst_line; 1.1074 + dst_line += dst_stride; 1.1075 + mask = mask_line; 1.1076 + mask_line += mask_stride; 1.1077 + w = width; 1.1078 + 1.1079 + bitcache = *mask++; 1.1080 + bitmask = CREATE_BITMASK (mask_x & 31); 1.1081 + 1.1082 + while (w--) 1.1083 + { 1.1084 + if (bitmask == 0) 1.1085 + { 1.1086 + bitcache = *mask++; 1.1087 + bitmask = CREATE_BITMASK (0); 1.1088 + } 1.1089 + if (bitcache & bitmask) 1.1090 + *dst = src565; 1.1091 + bitmask = UPDATE_BITMASK (bitmask); 1.1092 + dst++; 1.1093 + } 1.1094 + } 1.1095 + } 1.1096 + else 1.1097 + { 1.1098 + while (height--) 1.1099 + { 1.1100 + dst = dst_line; 1.1101 + dst_line += dst_stride; 1.1102 + mask = mask_line; 1.1103 + mask_line += mask_stride; 1.1104 + w = width; 1.1105 + 1.1106 + bitcache = *mask++; 1.1107 + bitmask = CREATE_BITMASK (mask_x & 31); 1.1108 + 1.1109 + while (w--) 1.1110 + { 1.1111 + if (bitmask == 0) 1.1112 + { 1.1113 + bitcache = *mask++; 1.1114 + bitmask = CREATE_BITMASK (0); 1.1115 + } 1.1116 + if (bitcache & bitmask) 1.1117 + { 1.1118 + d = over (src, convert_0565_to_0888 (*dst)); 1.1119 + *dst = convert_8888_to_0565 (d); 1.1120 + } 1.1121 + bitmask = UPDATE_BITMASK (bitmask); 1.1122 + dst++; 1.1123 + } 1.1124 + } 1.1125 + } 1.1126 +} 1.1127 + 1.1128 +/* 1.1129 + * Simple bitblt 1.1130 + */ 1.1131 + 1.1132 +static void 1.1133 +fast_composite_solid_fill (pixman_implementation_t *imp, 1.1134 + pixman_composite_info_t *info) 1.1135 +{ 1.1136 + PIXMAN_COMPOSITE_ARGS (info); 1.1137 + uint32_t src; 1.1138 + 1.1139 + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1.1140 + 1.1141 + if (dest_image->bits.format == PIXMAN_a1) 1.1142 + { 1.1143 + src = src >> 31; 1.1144 + } 1.1145 + else if (dest_image->bits.format == PIXMAN_a8) 1.1146 + { 1.1147 + src = src >> 24; 1.1148 + } 1.1149 + else if (dest_image->bits.format == PIXMAN_r5g6b5 || 1.1150 + dest_image->bits.format == PIXMAN_b5g6r5) 1.1151 + { 1.1152 + src = convert_8888_to_0565 (src); 1.1153 + } 1.1154 + 1.1155 + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 1.1156 + PIXMAN_FORMAT_BPP (dest_image->bits.format), 1.1157 + dest_x, dest_y, 1.1158 + width, height, 1.1159 + src); 1.1160 +} 1.1161 + 1.1162 +static void 1.1163 +fast_composite_src_memcpy (pixman_implementation_t *imp, 1.1164 + pixman_composite_info_t *info) 1.1165 +{ 1.1166 + PIXMAN_COMPOSITE_ARGS (info); 1.1167 + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; 1.1168 + uint32_t n_bytes = width * bpp; 1.1169 + int dst_stride, src_stride; 1.1170 + uint8_t *dst; 1.1171 + uint8_t *src; 1.1172 + 1.1173 + src_stride = src_image->bits.rowstride * 4; 1.1174 + dst_stride = dest_image->bits.rowstride * 4; 1.1175 + 1.1176 + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; 1.1177 + dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; 1.1178 + 1.1179 + while (height--) 1.1180 + { 1.1181 + memcpy (dst, src, n_bytes); 1.1182 + 1.1183 + dst += dst_stride; 1.1184 + src += src_stride; 1.1185 + } 1.1186 +} 1.1187 + 1.1188 +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) 1.1189 +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) 1.1190 +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) 1.1191 +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) 1.1192 +FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) 1.1193 +FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) 1.1194 +FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) 1.1195 +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) 1.1196 +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) 1.1197 +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) 1.1198 +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) 1.1199 +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) 1.1200 +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) 1.1201 +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) 1.1202 +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) 1.1203 +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) 1.1204 +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) 1.1205 +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) 1.1206 +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) 1.1207 +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) 1.1208 + 1.1209 +static force_inline void 1.1210 +scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst, 1.1211 + const uint32_t * mask, 1.1212 + const uint32_t * src_top, 1.1213 + const uint32_t * src_bottom, 1.1214 + int32_t w, 1.1215 + int wt, 1.1216 + int wb, 1.1217 + pixman_fixed_t vx, 1.1218 + pixman_fixed_t unit_x, 1.1219 + pixman_fixed_t max_vx, 1.1220 + pixman_bool_t zero_src) 1.1221 +{ 1.1222 + while ((w -= 1) >= 0) 1.1223 + { 1.1224 + uint32_t tl = src_top [pixman_fixed_to_int (vx)]; 1.1225 + uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 1.1226 + uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; 1.1227 + uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 1.1228 + uint32_t src, result; 1.1229 + uint16_t d; 1.1230 + d = *dst; 1.1231 + src = bilinear_interpolation (tl, tr, 1.1232 + bl, br, 1.1233 + pixman_fixed_to_bilinear_weight(vx), 1.1234 + wb); 1.1235 + vx += unit_x; 1.1236 + result = over (src, convert_0565_to_0888 (d)); 1.1237 + *dst++ = convert_8888_to_0565 (result); 1.1238 + } 1.1239 +} 1.1240 + 1.1241 +static force_inline void 1.1242 +scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst, 1.1243 + const uint32_t * mask, 1.1244 + const uint32_t * src_top, 1.1245 + const uint32_t * src_bottom, 1.1246 + int32_t w, 1.1247 + int wt, 1.1248 + int wb, 1.1249 + pixman_fixed_t vx, 1.1250 + pixman_fixed_t unit_x, 1.1251 + pixman_fixed_t max_vx, 1.1252 + pixman_bool_t zero_src) 1.1253 +{ 1.1254 + while ((w -= 1) >= 0) 1.1255 + { 1.1256 + uint32_t tl = src_top [pixman_fixed_to_int (vx)]; 1.1257 + uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 1.1258 + uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; 1.1259 + uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 1.1260 + uint32_t src; 1.1261 + uint32_t d; 1.1262 + uint32_t result; 1.1263 + d = *dst; 1.1264 + src = bilinear_interpolation (tl, tr, 1.1265 + bl, br, 1.1266 + pixman_fixed_to_bilinear_weight(vx), 1.1267 + wb); 1.1268 + vx += unit_x; 1.1269 + *dst++ = over (src, d); 1.1270 + } 1.1271 +} 1.1272 + 1.1273 +#ifndef LOWER_QUALITY_INTERPOLATION 1.1274 + 1.1275 +static force_inline void 1.1276 +scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, 1.1277 + const uint32_t * mask, 1.1278 + const uint16_t * src_top, 1.1279 + const uint16_t * src_bottom, 1.1280 + int32_t w, 1.1281 + int wt, 1.1282 + int wb, 1.1283 + pixman_fixed_t vx, 1.1284 + pixman_fixed_t unit_x, 1.1285 + pixman_fixed_t max_vx, 1.1286 + pixman_bool_t zero_src) 1.1287 +{ 1.1288 + while ((w -= 1) >= 0) 1.1289 + { 1.1290 + uint16_t tl = src_top [pixman_fixed_to_int (vx)]; 1.1291 + uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 1.1292 + uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; 1.1293 + uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 1.1294 + uint32_t d; 1.1295 + d = bilinear_interpolation(convert_0565_to_8888 (tl), 1.1296 + convert_0565_to_8888 (tr), 1.1297 + convert_0565_to_8888 (bl), 1.1298 + convert_0565_to_8888 (br), 1.1299 + pixman_fixed_to_bilinear_weight (vx), 1.1300 + wb); 1.1301 + vx += unit_x; 1.1302 + *dst++ = convert_8888_to_0565 (d); 1.1303 + } 1.1304 +} 1.1305 + 1.1306 +#else 1.1307 + 1.1308 +/* This is a clever low resolution bilinear interpolation inspired by the code 1.1309 + in Skia */ 1.1310 + 1.1311 +/* This takes the green component from the 565 representation and moves it: 1.1312 + 00000000 00000000 rrrrrggg gggbbbbb 1.1313 + 1.1314 + 00000ggg ggg00000 rrrrr000 000bbbbb 1.1315 + 1.1316 + This gives us 5 extra bits of space before each component to let us do 1.1317 + SWAR style optimizations 1.1318 +*/ 1.1319 + 1.1320 +#define GREEN_MASK (((1 << 6) - 1) << 5) 1.1321 + 1.1322 +static inline uint32_t 1.1323 +expand_rgb_565 (uint16_t c) { 1.1324 + return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK); 1.1325 +} 1.1326 + 1.1327 +static inline uint16_t 1.1328 +compact_rgb_565 (uint32_t c) { 1.1329 + return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK); 1.1330 +} 1.1331 + 1.1332 +static inline uint16_t 1.1333 +bilinear_interpolation_565(uint16_t tl, uint16_t tr, 1.1334 + uint16_t bl, uint16_t br, 1.1335 + int x, int y) 1.1336 +{ 1.1337 + int xy; 1.1338 + uint32_t a00 = expand_rgb_565 (tl); 1.1339 + uint32_t a01 = expand_rgb_565 (tr); 1.1340 + uint32_t a10 = expand_rgb_565 (bl); 1.1341 + uint32_t a11 = expand_rgb_565 (br); 1.1342 + 1.1343 + xy = (x * y) >> 3; 1.1344 + return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) + 1.1345 + a01 * (2*x - xy) + 1.1346 + a10 * (2*y - xy) + 1.1347 + a11 * xy) >> 5); 1.1348 +} 1.1349 + 1.1350 +static force_inline void 1.1351 +scaled_bilinear_scanline_565_565_SRC (uint16_t * dst, 1.1352 + const uint32_t * mask, 1.1353 + const uint16_t * src_top, 1.1354 + const uint16_t * src_bottom, 1.1355 + int32_t w, 1.1356 + int wt, 1.1357 + int wb, 1.1358 + pixman_fixed_t vx, 1.1359 + pixman_fixed_t unit_x, 1.1360 + pixman_fixed_t max_vx, 1.1361 + pixman_bool_t zero_src) 1.1362 +{ 1.1363 + while ((w -= 1) >= 0) 1.1364 + { 1.1365 + uint16_t tl = src_top [pixman_fixed_to_int (vx)]; 1.1366 + uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1]; 1.1367 + uint16_t bl = src_bottom [pixman_fixed_to_int (vx)]; 1.1368 + uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; 1.1369 + 1.1370 + uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br, 1.1371 + pixman_fixed_to_bilinear_weight(vx), 1.1372 + wb); 1.1373 + vx += unit_x; 1.1374 + *dst++ = d; 1.1375 + } 1.1376 +} 1.1377 + 1.1378 +#endif 1.1379 + 1.1380 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC, 1.1381 + scaled_bilinear_scanline_565_565_SRC, NULL, 1.1382 + uint16_t, uint32_t, uint16_t, 1.1383 + COVER, FLAG_NONE) 1.1384 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC, 1.1385 + scaled_bilinear_scanline_565_565_SRC, NULL, 1.1386 + uint16_t, uint32_t, uint16_t, 1.1387 + PAD, FLAG_NONE) 1.1388 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC, 1.1389 + scaled_bilinear_scanline_565_565_SRC, NULL, 1.1390 + uint16_t, uint32_t, uint16_t, 1.1391 + NONE, FLAG_NONE) 1.1392 +FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC, 1.1393 + scaled_bilinear_scanline_565_565_SRC, NULL, 1.1394 + uint16_t, uint32_t, uint16_t, 1.1395 + NORMAL, FLAG_NONE) 1.1396 + 1.1397 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER, 1.1398 + scaled_bilinear_scanline_8888_565_OVER, NULL, 1.1399 + uint32_t, uint32_t, uint16_t, 1.1400 + COVER, FLAG_NONE) 1.1401 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER, 1.1402 + scaled_bilinear_scanline_8888_565_OVER, NULL, 1.1403 + uint32_t, uint32_t, uint16_t, 1.1404 + PAD, FLAG_NONE) 1.1405 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER, 1.1406 + scaled_bilinear_scanline_8888_565_OVER, NULL, 1.1407 + uint32_t, uint32_t, uint16_t, 1.1408 + NONE, FLAG_NONE) 1.1409 +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER, 1.1410 + scaled_bilinear_scanline_8888_565_OVER, NULL, 1.1411 + uint32_t, uint32_t, uint16_t, 1.1412 + NORMAL, FLAG_NONE) 1.1413 + 1.1414 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER, 1.1415 + scaled_bilinear_scanline_8888_8888_OVER, NULL, 1.1416 + uint32_t, uint32_t, uint32_t, 1.1417 + COVER, FLAG_NONE) 1.1418 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER, 1.1419 + scaled_bilinear_scanline_8888_8888_OVER, NULL, 1.1420 + uint32_t, uint32_t, uint32_t, 1.1421 + PAD, FLAG_NONE) 1.1422 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER, 1.1423 + scaled_bilinear_scanline_8888_8888_OVER, NULL, 1.1424 + uint32_t, uint32_t, uint32_t, 1.1425 + NONE, FLAG_NONE) 1.1426 +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER, 1.1427 + scaled_bilinear_scanline_8888_8888_OVER, NULL, 1.1428 + uint32_t, uint32_t, uint32_t, 1.1429 + NORMAL, FLAG_NONE) 1.1430 + 1.1431 +#define REPEAT_MIN_WIDTH 32 1.1432 + 1.1433 +static void 1.1434 +fast_composite_tiled_repeat (pixman_implementation_t *imp, 1.1435 + pixman_composite_info_t *info) 1.1436 +{ 1.1437 + PIXMAN_COMPOSITE_ARGS (info); 1.1438 + pixman_composite_func_t func; 1.1439 + pixman_format_code_t mask_format; 1.1440 + uint32_t src_flags, mask_flags; 1.1441 + int32_t sx, sy; 1.1442 + int32_t width_remain; 1.1443 + int32_t num_pixels; 1.1444 + int32_t src_width; 1.1445 + int32_t i, j; 1.1446 + pixman_image_t extended_src_image; 1.1447 + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; 1.1448 + pixman_bool_t need_src_extension; 1.1449 + uint32_t *src_line; 1.1450 + int32_t src_stride; 1.1451 + int32_t src_bpp; 1.1452 + pixman_composite_info_t info2 = *info; 1.1453 + 1.1454 + src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | 1.1455 + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; 1.1456 + 1.1457 + if (mask_image) 1.1458 + { 1.1459 + mask_format = mask_image->common.extended_format_code; 1.1460 + mask_flags = info->mask_flags; 1.1461 + } 1.1462 + else 1.1463 + { 1.1464 + mask_format = PIXMAN_null; 1.1465 + mask_flags = FAST_PATH_IS_OPAQUE; 1.1466 + } 1.1467 + 1.1468 + _pixman_implementation_lookup_composite ( 1.1469 + imp->toplevel, info->op, 1.1470 + src_image->common.extended_format_code, src_flags, 1.1471 + mask_format, mask_flags, 1.1472 + dest_image->common.extended_format_code, info->dest_flags, 1.1473 + &imp, &func); 1.1474 + 1.1475 + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); 1.1476 + 1.1477 + if (src_image->bits.width < REPEAT_MIN_WIDTH && 1.1478 + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && 1.1479 + !src_image->bits.indexed) 1.1480 + { 1.1481 + sx = src_x; 1.1482 + sx = MOD (sx, src_image->bits.width); 1.1483 + sx += width; 1.1484 + src_width = 0; 1.1485 + 1.1486 + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) 1.1487 + src_width += src_image->bits.width; 1.1488 + 1.1489 + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); 1.1490 + 1.1491 + /* Initialize/validate stack-allocated temporary image */ 1.1492 + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, 1.1493 + src_width, 1, &extended_src[0], src_stride, 1.1494 + FALSE); 1.1495 + _pixman_image_validate (&extended_src_image); 1.1496 + 1.1497 + info2.src_image = &extended_src_image; 1.1498 + need_src_extension = TRUE; 1.1499 + } 1.1500 + else 1.1501 + { 1.1502 + src_width = src_image->bits.width; 1.1503 + need_src_extension = FALSE; 1.1504 + } 1.1505 + 1.1506 + sx = src_x; 1.1507 + sy = src_y; 1.1508 + 1.1509 + while (--height >= 0) 1.1510 + { 1.1511 + sx = MOD (sx, src_width); 1.1512 + sy = MOD (sy, src_image->bits.height); 1.1513 + 1.1514 + if (need_src_extension) 1.1515 + { 1.1516 + if (src_bpp == 32) 1.1517 + { 1.1518 + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); 1.1519 + 1.1520 + for (i = 0; i < src_width; ) 1.1521 + { 1.1522 + for (j = 0; j < src_image->bits.width; j++, i++) 1.1523 + extended_src[i] = src_line[j]; 1.1524 + } 1.1525 + } 1.1526 + else if (src_bpp == 16) 1.1527 + { 1.1528 + uint16_t *src_line_16; 1.1529 + 1.1530 + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, 1.1531 + src_line_16, 1); 1.1532 + src_line = (uint32_t*)src_line_16; 1.1533 + 1.1534 + for (i = 0; i < src_width; ) 1.1535 + { 1.1536 + for (j = 0; j < src_image->bits.width; j++, i++) 1.1537 + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; 1.1538 + } 1.1539 + } 1.1540 + else if (src_bpp == 8) 1.1541 + { 1.1542 + uint8_t *src_line_8; 1.1543 + 1.1544 + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, 1.1545 + src_line_8, 1); 1.1546 + src_line = (uint32_t*)src_line_8; 1.1547 + 1.1548 + for (i = 0; i < src_width; ) 1.1549 + { 1.1550 + for (j = 0; j < src_image->bits.width; j++, i++) 1.1551 + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; 1.1552 + } 1.1553 + } 1.1554 + 1.1555 + info2.src_y = 0; 1.1556 + } 1.1557 + else 1.1558 + { 1.1559 + info2.src_y = sy; 1.1560 + } 1.1561 + 1.1562 + width_remain = width; 1.1563 + 1.1564 + while (width_remain > 0) 1.1565 + { 1.1566 + num_pixels = src_width - sx; 1.1567 + 1.1568 + if (num_pixels > width_remain) 1.1569 + num_pixels = width_remain; 1.1570 + 1.1571 + info2.src_x = sx; 1.1572 + info2.width = num_pixels; 1.1573 + info2.height = 1; 1.1574 + 1.1575 + func (imp, &info2); 1.1576 + 1.1577 + width_remain -= num_pixels; 1.1578 + info2.mask_x += num_pixels; 1.1579 + info2.dest_x += num_pixels; 1.1580 + sx = 0; 1.1581 + } 1.1582 + 1.1583 + sx = src_x; 1.1584 + sy++; 1.1585 + info2.mask_x = info->mask_x; 1.1586 + info2.mask_y++; 1.1587 + info2.dest_x = info->dest_x; 1.1588 + info2.dest_y++; 1.1589 + } 1.1590 + 1.1591 + if (need_src_extension) 1.1592 + _pixman_image_fini (&extended_src_image); 1.1593 +} 1.1594 + 1.1595 +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ 1.1596 +static force_inline void 1.1597 +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, 1.1598 + const uint16_t * src, 1.1599 + int32_t w, 1.1600 + pixman_fixed_t vx, 1.1601 + pixman_fixed_t unit_x, 1.1602 + pixman_fixed_t max_vx, 1.1603 + pixman_bool_t fully_transparent_src) 1.1604 +{ 1.1605 + uint16_t tmp1, tmp2, tmp3, tmp4; 1.1606 + while ((w -= 4) >= 0) 1.1607 + { 1.1608 + tmp1 = *(src + pixman_fixed_to_int (vx)); 1.1609 + vx += unit_x; 1.1610 + tmp2 = *(src + pixman_fixed_to_int (vx)); 1.1611 + vx += unit_x; 1.1612 + tmp3 = *(src + pixman_fixed_to_int (vx)); 1.1613 + vx += unit_x; 1.1614 + tmp4 = *(src + pixman_fixed_to_int (vx)); 1.1615 + vx += unit_x; 1.1616 + *dst++ = tmp1; 1.1617 + *dst++ = tmp2; 1.1618 + *dst++ = tmp3; 1.1619 + *dst++ = tmp4; 1.1620 + } 1.1621 + if (w & 2) 1.1622 + { 1.1623 + tmp1 = *(src + pixman_fixed_to_int (vx)); 1.1624 + vx += unit_x; 1.1625 + tmp2 = *(src + pixman_fixed_to_int (vx)); 1.1626 + vx += unit_x; 1.1627 + *dst++ = tmp1; 1.1628 + *dst++ = tmp2; 1.1629 + } 1.1630 + if (w & 1) 1.1631 + *dst = *(src + pixman_fixed_to_int (vx)); 1.1632 +} 1.1633 + 1.1634 +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, 1.1635 + scaled_nearest_scanline_565_565_SRC, 1.1636 + uint16_t, uint16_t, COVER) 1.1637 +FAST_NEAREST_MAINLOOP (565_565_none_SRC, 1.1638 + scaled_nearest_scanline_565_565_SRC, 1.1639 + uint16_t, uint16_t, NONE) 1.1640 +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, 1.1641 + scaled_nearest_scanline_565_565_SRC, 1.1642 + uint16_t, uint16_t, PAD) 1.1643 + 1.1644 +static force_inline uint32_t 1.1645 +fetch_nearest (pixman_repeat_t src_repeat, 1.1646 + pixman_format_code_t format, 1.1647 + uint32_t *src, int x, int src_width) 1.1648 +{ 1.1649 + if (repeat (src_repeat, &x, src_width)) 1.1650 + { 1.1651 + if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8) 1.1652 + return *(src + x) | 0xff000000; 1.1653 + else 1.1654 + return *(src + x); 1.1655 + } 1.1656 + else 1.1657 + { 1.1658 + return 0; 1.1659 + } 1.1660 +} 1.1661 + 1.1662 +static force_inline void 1.1663 +combine_over (uint32_t s, uint32_t *dst) 1.1664 +{ 1.1665 + if (s) 1.1666 + { 1.1667 + uint8_t ia = 0xff - (s >> 24); 1.1668 + 1.1669 + if (ia) 1.1670 + UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); 1.1671 + else 1.1672 + *dst = s; 1.1673 + } 1.1674 +} 1.1675 + 1.1676 +static force_inline void 1.1677 +combine_src (uint32_t s, uint32_t *dst) 1.1678 +{ 1.1679 + *dst = s; 1.1680 +} 1.1681 + 1.1682 +static void 1.1683 +fast_composite_scaled_nearest (pixman_implementation_t *imp, 1.1684 + pixman_composite_info_t *info) 1.1685 +{ 1.1686 + PIXMAN_COMPOSITE_ARGS (info); 1.1687 + uint32_t *dst_line; 1.1688 + uint32_t *src_line; 1.1689 + int dst_stride, src_stride; 1.1690 + int src_width, src_height; 1.1691 + pixman_repeat_t src_repeat; 1.1692 + pixman_fixed_t unit_x, unit_y; 1.1693 + pixman_format_code_t src_format; 1.1694 + pixman_vector_t v; 1.1695 + pixman_fixed_t vy; 1.1696 + 1.1697 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1.1698 + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be 1.1699 + * transformed from destination space to source space 1.1700 + */ 1.1701 + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); 1.1702 + 1.1703 + /* reference point is the center of the pixel */ 1.1704 + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; 1.1705 + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; 1.1706 + v.vector[2] = pixman_fixed_1; 1.1707 + 1.1708 + if (!pixman_transform_point_3d (src_image->common.transform, &v)) 1.1709 + return; 1.1710 + 1.1711 + unit_x = src_image->common.transform->matrix[0][0]; 1.1712 + unit_y = src_image->common.transform->matrix[1][1]; 1.1713 + 1.1714 + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ 1.1715 + v.vector[0] -= pixman_fixed_e; 1.1716 + v.vector[1] -= pixman_fixed_e; 1.1717 + 1.1718 + src_height = src_image->bits.height; 1.1719 + src_width = src_image->bits.width; 1.1720 + src_repeat = src_image->common.repeat; 1.1721 + src_format = src_image->bits.format; 1.1722 + 1.1723 + vy = v.vector[1]; 1.1724 + while (height--) 1.1725 + { 1.1726 + pixman_fixed_t vx = v.vector[0]; 1.1727 + int y = pixman_fixed_to_int (vy); 1.1728 + uint32_t *dst = dst_line; 1.1729 + 1.1730 + dst_line += dst_stride; 1.1731 + 1.1732 + /* adjust the y location by a unit vector in the y direction 1.1733 + * this is equivalent to transforming y+1 of the destination point to source space */ 1.1734 + vy += unit_y; 1.1735 + 1.1736 + if (!repeat (src_repeat, &y, src_height)) 1.1737 + { 1.1738 + if (op == PIXMAN_OP_SRC) 1.1739 + memset (dst, 0, sizeof (*dst) * width); 1.1740 + } 1.1741 + else 1.1742 + { 1.1743 + int w = width; 1.1744 + 1.1745 + uint32_t *src = src_line + y * src_stride; 1.1746 + 1.1747 + while (w >= 2) 1.1748 + { 1.1749 + uint32_t s1, s2; 1.1750 + int x1, x2; 1.1751 + 1.1752 + x1 = pixman_fixed_to_int (vx); 1.1753 + vx += unit_x; 1.1754 + 1.1755 + x2 = pixman_fixed_to_int (vx); 1.1756 + vx += unit_x; 1.1757 + 1.1758 + w -= 2; 1.1759 + 1.1760 + s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); 1.1761 + s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); 1.1762 + 1.1763 + if (op == PIXMAN_OP_OVER) 1.1764 + { 1.1765 + combine_over (s1, dst++); 1.1766 + combine_over (s2, dst++); 1.1767 + } 1.1768 + else 1.1769 + { 1.1770 + combine_src (s1, dst++); 1.1771 + combine_src (s2, dst++); 1.1772 + } 1.1773 + } 1.1774 + 1.1775 + while (w--) 1.1776 + { 1.1777 + uint32_t s; 1.1778 + int x; 1.1779 + 1.1780 + x = pixman_fixed_to_int (vx); 1.1781 + vx += unit_x; 1.1782 + 1.1783 + s = fetch_nearest (src_repeat, src_format, src, x, src_width); 1.1784 + 1.1785 + if (op == PIXMAN_OP_OVER) 1.1786 + combine_over (s, dst++); 1.1787 + else 1.1788 + combine_src (s, dst++); 1.1789 + } 1.1790 + } 1.1791 + } 1.1792 +} 1.1793 + 1.1794 +#define CACHE_LINE_SIZE 64 1.1795 + 1.1796 +#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ 1.1797 + \ 1.1798 +static void \ 1.1799 +blt_rotated_90_trivial_##suffix (pix_type *dst, \ 1.1800 + int dst_stride, \ 1.1801 + const pix_type *src, \ 1.1802 + int src_stride, \ 1.1803 + int w, \ 1.1804 + int h) \ 1.1805 +{ \ 1.1806 + int x, y; \ 1.1807 + for (y = 0; y < h; y++) \ 1.1808 + { \ 1.1809 + const pix_type *s = src + (h - y - 1); \ 1.1810 + pix_type *d = dst + dst_stride * y; \ 1.1811 + for (x = 0; x < w; x++) \ 1.1812 + { \ 1.1813 + *d++ = *s; \ 1.1814 + s += src_stride; \ 1.1815 + } \ 1.1816 + } \ 1.1817 +} \ 1.1818 + \ 1.1819 +static void \ 1.1820 +blt_rotated_270_trivial_##suffix (pix_type *dst, \ 1.1821 + int dst_stride, \ 1.1822 + const pix_type *src, \ 1.1823 + int src_stride, \ 1.1824 + int w, \ 1.1825 + int h) \ 1.1826 +{ \ 1.1827 + int x, y; \ 1.1828 + for (y = 0; y < h; y++) \ 1.1829 + { \ 1.1830 + const pix_type *s = src + src_stride * (w - 1) + y; \ 1.1831 + pix_type *d = dst + dst_stride * y; \ 1.1832 + for (x = 0; x < w; x++) \ 1.1833 + { \ 1.1834 + *d++ = *s; \ 1.1835 + s -= src_stride; \ 1.1836 + } \ 1.1837 + } \ 1.1838 +} \ 1.1839 + \ 1.1840 +static void \ 1.1841 +blt_rotated_90_##suffix (pix_type *dst, \ 1.1842 + int dst_stride, \ 1.1843 + const pix_type *src, \ 1.1844 + int src_stride, \ 1.1845 + int W, \ 1.1846 + int H) \ 1.1847 +{ \ 1.1848 + int x; \ 1.1849 + int leading_pixels = 0, trailing_pixels = 0; \ 1.1850 + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ 1.1851 + \ 1.1852 + /* \ 1.1853 + * split processing into handling destination as TILE_SIZExH cache line \ 1.1854 + * aligned vertical stripes (optimistically assuming that destination \ 1.1855 + * stride is a multiple of cache line, if not - it will be just a bit \ 1.1856 + * slower) \ 1.1857 + */ \ 1.1858 + \ 1.1859 + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ 1.1860 + { \ 1.1861 + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ 1.1862 + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1.1863 + if (leading_pixels > W) \ 1.1864 + leading_pixels = W; \ 1.1865 + \ 1.1866 + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ 1.1867 + blt_rotated_90_trivial_##suffix ( \ 1.1868 + dst, \ 1.1869 + dst_stride, \ 1.1870 + src, \ 1.1871 + src_stride, \ 1.1872 + leading_pixels, \ 1.1873 + H); \ 1.1874 + \ 1.1875 + dst += leading_pixels; \ 1.1876 + src += leading_pixels * src_stride; \ 1.1877 + W -= leading_pixels; \ 1.1878 + } \ 1.1879 + \ 1.1880 + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ 1.1881 + { \ 1.1882 + trailing_pixels = (((uintptr_t)(dst + W) & \ 1.1883 + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1.1884 + if (trailing_pixels > W) \ 1.1885 + trailing_pixels = W; \ 1.1886 + W -= trailing_pixels; \ 1.1887 + } \ 1.1888 + \ 1.1889 + for (x = 0; x < W; x += TILE_SIZE) \ 1.1890 + { \ 1.1891 + /* aligned middle part TILE_SIZExH */ \ 1.1892 + blt_rotated_90_trivial_##suffix ( \ 1.1893 + dst + x, \ 1.1894 + dst_stride, \ 1.1895 + src + src_stride * x, \ 1.1896 + src_stride, \ 1.1897 + TILE_SIZE, \ 1.1898 + H); \ 1.1899 + } \ 1.1900 + \ 1.1901 + if (trailing_pixels) \ 1.1902 + { \ 1.1903 + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ 1.1904 + blt_rotated_90_trivial_##suffix ( \ 1.1905 + dst + W, \ 1.1906 + dst_stride, \ 1.1907 + src + W * src_stride, \ 1.1908 + src_stride, \ 1.1909 + trailing_pixels, \ 1.1910 + H); \ 1.1911 + } \ 1.1912 +} \ 1.1913 + \ 1.1914 +static void \ 1.1915 +blt_rotated_270_##suffix (pix_type *dst, \ 1.1916 + int dst_stride, \ 1.1917 + const pix_type *src, \ 1.1918 + int src_stride, \ 1.1919 + int W, \ 1.1920 + int H) \ 1.1921 +{ \ 1.1922 + int x; \ 1.1923 + int leading_pixels = 0, trailing_pixels = 0; \ 1.1924 + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ 1.1925 + \ 1.1926 + /* \ 1.1927 + * split processing into handling destination as TILE_SIZExH cache line \ 1.1928 + * aligned vertical stripes (optimistically assuming that destination \ 1.1929 + * stride is a multiple of cache line, if not - it will be just a bit \ 1.1930 + * slower) \ 1.1931 + */ \ 1.1932 + \ 1.1933 + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ 1.1934 + { \ 1.1935 + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ 1.1936 + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1.1937 + if (leading_pixels > W) \ 1.1938 + leading_pixels = W; \ 1.1939 + \ 1.1940 + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ 1.1941 + blt_rotated_270_trivial_##suffix ( \ 1.1942 + dst, \ 1.1943 + dst_stride, \ 1.1944 + src + src_stride * (W - leading_pixels), \ 1.1945 + src_stride, \ 1.1946 + leading_pixels, \ 1.1947 + H); \ 1.1948 + \ 1.1949 + dst += leading_pixels; \ 1.1950 + W -= leading_pixels; \ 1.1951 + } \ 1.1952 + \ 1.1953 + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ 1.1954 + { \ 1.1955 + trailing_pixels = (((uintptr_t)(dst + W) & \ 1.1956 + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1.1957 + if (trailing_pixels > W) \ 1.1958 + trailing_pixels = W; \ 1.1959 + W -= trailing_pixels; \ 1.1960 + src += trailing_pixels * src_stride; \ 1.1961 + } \ 1.1962 + \ 1.1963 + for (x = 0; x < W; x += TILE_SIZE) \ 1.1964 + { \ 1.1965 + /* aligned middle part TILE_SIZExH */ \ 1.1966 + blt_rotated_270_trivial_##suffix ( \ 1.1967 + dst + x, \ 1.1968 + dst_stride, \ 1.1969 + src + src_stride * (W - x - TILE_SIZE), \ 1.1970 + src_stride, \ 1.1971 + TILE_SIZE, \ 1.1972 + H); \ 1.1973 + } \ 1.1974 + \ 1.1975 + if (trailing_pixels) \ 1.1976 + { \ 1.1977 + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ 1.1978 + blt_rotated_270_trivial_##suffix ( \ 1.1979 + dst + W, \ 1.1980 + dst_stride, \ 1.1981 + src - trailing_pixels * src_stride, \ 1.1982 + src_stride, \ 1.1983 + trailing_pixels, \ 1.1984 + H); \ 1.1985 + } \ 1.1986 +} \ 1.1987 + \ 1.1988 +static void \ 1.1989 +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ 1.1990 + pixman_composite_info_t *info) \ 1.1991 +{ \ 1.1992 + PIXMAN_COMPOSITE_ARGS (info); \ 1.1993 + pix_type *dst_line; \ 1.1994 + pix_type *src_line; \ 1.1995 + int dst_stride, src_stride; \ 1.1996 + int src_x_t, src_y_t; \ 1.1997 + \ 1.1998 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ 1.1999 + dst_stride, dst_line, 1); \ 1.2000 + src_x_t = -src_y + pixman_fixed_to_int ( \ 1.2001 + src_image->common.transform->matrix[0][2] + \ 1.2002 + pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ 1.2003 + src_y_t = src_x + pixman_fixed_to_int ( \ 1.2004 + src_image->common.transform->matrix[1][2] + \ 1.2005 + pixman_fixed_1 / 2 - pixman_fixed_e); \ 1.2006 + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ 1.2007 + src_stride, src_line, 1); \ 1.2008 + blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ 1.2009 + width, height); \ 1.2010 +} \ 1.2011 + \ 1.2012 +static void \ 1.2013 +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ 1.2014 + pixman_composite_info_t *info) \ 1.2015 +{ \ 1.2016 + PIXMAN_COMPOSITE_ARGS (info); \ 1.2017 + pix_type *dst_line; \ 1.2018 + pix_type *src_line; \ 1.2019 + int dst_stride, src_stride; \ 1.2020 + int src_x_t, src_y_t; \ 1.2021 + \ 1.2022 + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ 1.2023 + dst_stride, dst_line, 1); \ 1.2024 + src_x_t = src_y + pixman_fixed_to_int ( \ 1.2025 + src_image->common.transform->matrix[0][2] + \ 1.2026 + pixman_fixed_1 / 2 - pixman_fixed_e); \ 1.2027 + src_y_t = -src_x + pixman_fixed_to_int ( \ 1.2028 + src_image->common.transform->matrix[1][2] + \ 1.2029 + pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ 1.2030 + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ 1.2031 + src_stride, src_line, 1); \ 1.2032 + blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ 1.2033 + width, height); \ 1.2034 +} 1.2035 + 1.2036 +FAST_SIMPLE_ROTATE (8, uint8_t) 1.2037 +FAST_SIMPLE_ROTATE (565, uint16_t) 1.2038 +FAST_SIMPLE_ROTATE (8888, uint32_t) 1.2039 + 1.2040 +static const pixman_fast_path_t c_fast_paths[] = 1.2041 +{ 1.2042 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), 1.2043 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), 1.2044 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), 1.2045 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), 1.2046 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), 1.2047 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), 1.2048 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), 1.2049 + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), 1.2050 + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), 1.2051 + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), 1.2052 + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), 1.2053 + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), 1.2054 + PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), 1.2055 + PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), 1.2056 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), 1.2057 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), 1.2058 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), 1.2059 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), 1.2060 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), 1.2061 + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), 1.2062 + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), 1.2063 + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), 1.2064 + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), 1.2065 + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), 1.2066 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), 1.2067 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), 1.2068 + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), 1.2069 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), 1.2070 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), 1.2071 + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), 1.2072 + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565), 1.2073 + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565), 1.2074 + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), 1.2075 + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), 1.2076 + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), 1.2077 + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), 1.2078 + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), 1.2079 + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), 1.2080 + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), 1.2081 + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), 1.2082 + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), 1.2083 + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), 1.2084 + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), 1.2085 + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), 1.2086 + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), 1.2087 + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), 1.2088 + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), 1.2089 + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), 1.2090 + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), 1.2091 + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), 1.2092 + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), 1.2093 + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), 1.2094 + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), 1.2095 + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), 1.2096 + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), 1.2097 + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), 1.2098 + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), 1.2099 + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), 1.2100 + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), 1.2101 + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), 1.2102 + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), 1.2103 + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), 1.2104 + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), 1.2105 + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), 1.2106 + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), 1.2107 + 1.2108 + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), 1.2109 + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), 1.2110 + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), 1.2111 + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), 1.2112 + 1.2113 + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), 1.2114 + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), 1.2115 + 1.2116 + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), 1.2117 + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), 1.2118 + 1.2119 + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), 1.2120 + 1.2121 + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), 1.2122 + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), 1.2123 + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), 1.2124 + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), 1.2125 + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), 1.2126 + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), 1.2127 + 1.2128 + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), 1.2129 + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), 1.2130 + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), 1.2131 + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), 1.2132 + 1.2133 + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), 1.2134 + 1.2135 +#define NEAREST_FAST_PATH(op,s,d) \ 1.2136 + { PIXMAN_OP_ ## op, \ 1.2137 + PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ 1.2138 + PIXMAN_null, 0, \ 1.2139 + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1.2140 + fast_composite_scaled_nearest, \ 1.2141 + } 1.2142 + 1.2143 + NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), 1.2144 + NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), 1.2145 + NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), 1.2146 + NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), 1.2147 + 1.2148 + NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), 1.2149 + NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), 1.2150 + NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), 1.2151 + NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), 1.2152 + 1.2153 + NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), 1.2154 + NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), 1.2155 + NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), 1.2156 + NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), 1.2157 + 1.2158 + NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), 1.2159 + NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), 1.2160 + NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), 1.2161 + NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), 1.2162 + 1.2163 +#define SIMPLE_ROTATE_FLAGS(angle) \ 1.2164 + (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ 1.2165 + FAST_PATH_NEAREST_FILTER | \ 1.2166 + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \ 1.2167 + FAST_PATH_STANDARD_FLAGS) 1.2168 + 1.2169 +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ 1.2170 + { PIXMAN_OP_ ## op, \ 1.2171 + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ 1.2172 + PIXMAN_null, 0, \ 1.2173 + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1.2174 + fast_composite_rotate_90_##suffix, \ 1.2175 + }, \ 1.2176 + { PIXMAN_OP_ ## op, \ 1.2177 + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ 1.2178 + PIXMAN_null, 0, \ 1.2179 + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1.2180 + fast_composite_rotate_270_##suffix, \ 1.2181 + } 1.2182 + 1.2183 + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), 1.2184 + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), 1.2185 + SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), 1.2186 + SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), 1.2187 + SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), 1.2188 + 1.2189 + /* Simple repeat fast path entry. */ 1.2190 + { PIXMAN_OP_any, 1.2191 + PIXMAN_any, 1.2192 + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | 1.2193 + FAST_PATH_NORMAL_REPEAT), 1.2194 + PIXMAN_any, 0, 1.2195 + PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, 1.2196 + fast_composite_tiled_repeat 1.2197 + }, 1.2198 + 1.2199 + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), 1.2200 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), 1.2201 + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), 1.2202 + 1.2203 + { PIXMAN_OP_NONE }, 1.2204 +}; 1.2205 + 1.2206 +#ifdef WORDS_BIGENDIAN 1.2207 +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) 1.2208 +#else 1.2209 +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) 1.2210 +#endif 1.2211 + 1.2212 +static force_inline void 1.2213 +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) 1.2214 +{ 1.2215 + if (offs) 1.2216 + { 1.2217 + int leading_pixels = 32 - offs; 1.2218 + if (leading_pixels >= width) 1.2219 + { 1.2220 + if (v) 1.2221 + *dst |= A1_FILL_MASK (width, offs); 1.2222 + else 1.2223 + *dst &= ~A1_FILL_MASK (width, offs); 1.2224 + return; 1.2225 + } 1.2226 + else 1.2227 + { 1.2228 + if (v) 1.2229 + *dst++ |= A1_FILL_MASK (leading_pixels, offs); 1.2230 + else 1.2231 + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); 1.2232 + width -= leading_pixels; 1.2233 + } 1.2234 + } 1.2235 + while (width >= 32) 1.2236 + { 1.2237 + if (v) 1.2238 + *dst++ = 0xFFFFFFFF; 1.2239 + else 1.2240 + *dst++ = 0; 1.2241 + width -= 32; 1.2242 + } 1.2243 + if (width > 0) 1.2244 + { 1.2245 + if (v) 1.2246 + *dst |= A1_FILL_MASK (width, 0); 1.2247 + else 1.2248 + *dst &= ~A1_FILL_MASK (width, 0); 1.2249 + } 1.2250 +} 1.2251 + 1.2252 +static void 1.2253 +pixman_fill1 (uint32_t *bits, 1.2254 + int stride, 1.2255 + int x, 1.2256 + int y, 1.2257 + int width, 1.2258 + int height, 1.2259 + uint32_t filler) 1.2260 +{ 1.2261 + uint32_t *dst = bits + y * stride + (x >> 5); 1.2262 + int offs = x & 31; 1.2263 + 1.2264 + if (filler & 1) 1.2265 + { 1.2266 + while (height--) 1.2267 + { 1.2268 + pixman_fill1_line (dst, offs, width, 1); 1.2269 + dst += stride; 1.2270 + } 1.2271 + } 1.2272 + else 1.2273 + { 1.2274 + while (height--) 1.2275 + { 1.2276 + pixman_fill1_line (dst, offs, width, 0); 1.2277 + dst += stride; 1.2278 + } 1.2279 + } 1.2280 +} 1.2281 + 1.2282 +static void 1.2283 +pixman_fill8 (uint32_t *bits, 1.2284 + int stride, 1.2285 + int x, 1.2286 + int y, 1.2287 + int width, 1.2288 + int height, 1.2289 + uint32_t filler) 1.2290 +{ 1.2291 + int byte_stride = stride * (int) sizeof (uint32_t); 1.2292 + uint8_t *dst = (uint8_t *) bits; 1.2293 + uint8_t v = filler & 0xff; 1.2294 + int i; 1.2295 + 1.2296 + dst = dst + y * byte_stride + x; 1.2297 + 1.2298 + while (height--) 1.2299 + { 1.2300 + for (i = 0; i < width; ++i) 1.2301 + dst[i] = v; 1.2302 + 1.2303 + dst += byte_stride; 1.2304 + } 1.2305 +} 1.2306 + 1.2307 +static void 1.2308 +pixman_fill16 (uint32_t *bits, 1.2309 + int stride, 1.2310 + int x, 1.2311 + int y, 1.2312 + int width, 1.2313 + int height, 1.2314 + uint32_t filler) 1.2315 +{ 1.2316 + int short_stride = 1.2317 + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); 1.2318 + uint16_t *dst = (uint16_t *)bits; 1.2319 + uint16_t v = filler & 0xffff; 1.2320 + int i; 1.2321 + 1.2322 + dst = dst + y * short_stride + x; 1.2323 + 1.2324 + while (height--) 1.2325 + { 1.2326 + for (i = 0; i < width; ++i) 1.2327 + dst[i] = v; 1.2328 + 1.2329 + dst += short_stride; 1.2330 + } 1.2331 +} 1.2332 + 1.2333 +static void 1.2334 +pixman_fill32 (uint32_t *bits, 1.2335 + int stride, 1.2336 + int x, 1.2337 + int y, 1.2338 + int width, 1.2339 + int height, 1.2340 + uint32_t filler) 1.2341 +{ 1.2342 + int i; 1.2343 + 1.2344 + bits = bits + y * stride + x; 1.2345 + 1.2346 + while (height--) 1.2347 + { 1.2348 + for (i = 0; i < width; ++i) 1.2349 + bits[i] = filler; 1.2350 + 1.2351 + bits += stride; 1.2352 + } 1.2353 +} 1.2354 + 1.2355 +static pixman_bool_t 1.2356 +fast_path_fill (pixman_implementation_t *imp, 1.2357 + uint32_t * bits, 1.2358 + int stride, 1.2359 + int bpp, 1.2360 + int x, 1.2361 + int y, 1.2362 + int width, 1.2363 + int height, 1.2364 + uint32_t filler) 1.2365 +{ 1.2366 + switch (bpp) 1.2367 + { 1.2368 + case 1: 1.2369 + pixman_fill1 (bits, stride, x, y, width, height, filler); 1.2370 + break; 1.2371 + 1.2372 + case 8: 1.2373 + pixman_fill8 (bits, stride, x, y, width, height, filler); 1.2374 + break; 1.2375 + 1.2376 + case 16: 1.2377 + pixman_fill16 (bits, stride, x, y, width, height, filler); 1.2378 + break; 1.2379 + 1.2380 + case 32: 1.2381 + pixman_fill32 (bits, stride, x, y, width, height, filler); 1.2382 + break; 1.2383 + 1.2384 + default: 1.2385 + return FALSE; 1.2386 + } 1.2387 + 1.2388 + return TRUE; 1.2389 +} 1.2390 + 1.2391 +/*****************************************************************************/ 1.2392 + 1.2393 +static uint32_t * 1.2394 +fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) 1.2395 +{ 1.2396 + int32_t w = iter->width; 1.2397 + uint32_t *dst = iter->buffer; 1.2398 + const uint16_t *src = (const uint16_t *)iter->bits; 1.2399 + 1.2400 + iter->bits += iter->stride; 1.2401 + 1.2402 + /* Align the source buffer at 4 bytes boundary */ 1.2403 + if (w > 0 && ((uintptr_t)src & 3)) 1.2404 + { 1.2405 + *dst++ = convert_0565_to_8888 (*src++); 1.2406 + w--; 1.2407 + } 1.2408 + /* Process two pixels per iteration */ 1.2409 + while ((w -= 2) >= 0) 1.2410 + { 1.2411 + uint32_t sr, sb, sg, t0, t1; 1.2412 + uint32_t s = *(const uint32_t *)src; 1.2413 + src += 2; 1.2414 + sr = (s >> 8) & 0x00F800F8; 1.2415 + sb = (s << 3) & 0x00F800F8; 1.2416 + sg = (s >> 3) & 0x00FC00FC; 1.2417 + sr |= sr >> 5; 1.2418 + sb |= sb >> 5; 1.2419 + sg |= sg >> 6; 1.2420 + t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | 1.2421 + (sb & 0xFF) | 0xFF000000; 1.2422 + t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | 1.2423 + (sb >> 16) | 0xFF000000; 1.2424 +#ifdef WORDS_BIGENDIAN 1.2425 + *dst++ = t1; 1.2426 + *dst++ = t0; 1.2427 +#else 1.2428 + *dst++ = t0; 1.2429 + *dst++ = t1; 1.2430 +#endif 1.2431 + } 1.2432 + if (w & 1) 1.2433 + { 1.2434 + *dst = convert_0565_to_8888 (*src); 1.2435 + } 1.2436 + 1.2437 + return iter->buffer; 1.2438 +} 1.2439 + 1.2440 +static uint32_t * 1.2441 +fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) 1.2442 +{ 1.2443 + iter->bits += iter->stride; 1.2444 + return iter->buffer; 1.2445 +} 1.2446 + 1.2447 +/* Helper function for a workaround, which tries to ensure that 0x1F001F 1.2448 + * constant is always allocated in a register on RISC architectures. 1.2449 + */ 1.2450 +static force_inline uint32_t 1.2451 +convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) 1.2452 +{ 1.2453 + uint32_t a, b; 1.2454 + a = (s >> 3) & x1F001F; 1.2455 + b = s & 0xFC00; 1.2456 + a |= a >> 5; 1.2457 + a |= b >> 5; 1.2458 + return a; 1.2459 +} 1.2460 + 1.2461 +static void 1.2462 +fast_write_back_r5g6b5 (pixman_iter_t *iter) 1.2463 +{ 1.2464 + int32_t w = iter->width; 1.2465 + uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); 1.2466 + const uint32_t *src = iter->buffer; 1.2467 + /* Workaround to ensure that x1F001F variable is allocated in a register */ 1.2468 + static volatile uint32_t volatile_x1F001F = 0x1F001F; 1.2469 + uint32_t x1F001F = volatile_x1F001F; 1.2470 + 1.2471 + while ((w -= 4) >= 0) 1.2472 + { 1.2473 + uint32_t s1 = *src++; 1.2474 + uint32_t s2 = *src++; 1.2475 + uint32_t s3 = *src++; 1.2476 + uint32_t s4 = *src++; 1.2477 + *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); 1.2478 + *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); 1.2479 + *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); 1.2480 + *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); 1.2481 + } 1.2482 + if (w & 2) 1.2483 + { 1.2484 + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); 1.2485 + *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); 1.2486 + } 1.2487 + if (w & 1) 1.2488 + { 1.2489 + *dst = convert_8888_to_0565_workaround (*src, x1F001F); 1.2490 + } 1.2491 +} 1.2492 + 1.2493 +typedef struct 1.2494 +{ 1.2495 + pixman_format_code_t format; 1.2496 + pixman_iter_get_scanline_t get_scanline; 1.2497 + pixman_iter_write_back_t write_back; 1.2498 +} fetcher_info_t; 1.2499 + 1.2500 +static const fetcher_info_t fetchers[] = 1.2501 +{ 1.2502 + { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, 1.2503 + { PIXMAN_null } 1.2504 +}; 1.2505 + 1.2506 +static pixman_bool_t 1.2507 +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) 1.2508 +{ 1.2509 + pixman_image_t *image = iter->image; 1.2510 + 1.2511 +#define FLAGS \ 1.2512 + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ 1.2513 + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) 1.2514 + 1.2515 + if (iter->iter_flags & ITER_16) 1.2516 + return FALSE; 1.2517 + 1.2518 + if ((iter->iter_flags & ITER_NARROW) && 1.2519 + (iter->image_flags & FLAGS) == FLAGS) 1.2520 + { 1.2521 + const fetcher_info_t *f; 1.2522 + 1.2523 + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) 1.2524 + { 1.2525 + if (image->common.extended_format_code == f->format) 1.2526 + { 1.2527 + uint8_t *b = (uint8_t *)image->bits.bits; 1.2528 + int s = image->bits.rowstride * 4; 1.2529 + 1.2530 + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; 1.2531 + iter->stride = s; 1.2532 + 1.2533 + iter->get_scanline = f->get_scanline; 1.2534 + return TRUE; 1.2535 + } 1.2536 + } 1.2537 + } 1.2538 + 1.2539 + return FALSE; 1.2540 +} 1.2541 + 1.2542 +static pixman_bool_t 1.2543 +fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) 1.2544 +{ 1.2545 + pixman_image_t *image = iter->image; 1.2546 + 1.2547 + if (iter->iter_flags & ITER_16) 1.2548 + return FALSE; 1.2549 + 1.2550 + if ((iter->iter_flags & ITER_NARROW) && 1.2551 + (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) 1.2552 + { 1.2553 + const fetcher_info_t *f; 1.2554 + 1.2555 + for (f = &fetchers[0]; f->format != PIXMAN_null; f++) 1.2556 + { 1.2557 + if (image->common.extended_format_code == f->format) 1.2558 + { 1.2559 + uint8_t *b = (uint8_t *)image->bits.bits; 1.2560 + int s = image->bits.rowstride * 4; 1.2561 + 1.2562 + iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; 1.2563 + iter->stride = s; 1.2564 + 1.2565 + if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == 1.2566 + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) 1.2567 + { 1.2568 + iter->get_scanline = fast_dest_fetch_noop; 1.2569 + } 1.2570 + else 1.2571 + { 1.2572 + iter->get_scanline = f->get_scanline; 1.2573 + } 1.2574 + iter->write_back = f->write_back; 1.2575 + return TRUE; 1.2576 + } 1.2577 + } 1.2578 + } 1.2579 + return FALSE; 1.2580 +} 1.2581 + 1.2582 + 1.2583 +pixman_implementation_t * 1.2584 +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) 1.2585 +{ 1.2586 + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); 1.2587 + 1.2588 + imp->fill = fast_path_fill; 1.2589 + imp->src_iter_init = fast_src_iter_init; 1.2590 + imp->dest_iter_init = fast_dest_iter_init; 1.2591 + 1.2592 + return imp; 1.2593 +}