1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-combine32.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,272 @@ 1.4 +#define COMPONENT_SIZE 8 1.5 +#define MASK 0xff 1.6 +#define ONE_HALF 0x80 1.7 + 1.8 +#define A_SHIFT 8 * 3 1.9 +#define R_SHIFT 8 * 2 1.10 +#define G_SHIFT 8 1.11 +#define A_MASK 0xff000000 1.12 +#define R_MASK 0xff0000 1.13 +#define G_MASK 0xff00 1.14 + 1.15 +#define RB_MASK 0xff00ff 1.16 +#define AG_MASK 0xff00ff00 1.17 +#define RB_ONE_HALF 0x800080 1.18 +#define RB_MASK_PLUS_ONE 0x10000100 1.19 + 1.20 +#define ALPHA_8(x) ((x) >> A_SHIFT) 1.21 +#define RED_8(x) (((x) >> R_SHIFT) & MASK) 1.22 +#define GREEN_8(x) (((x) >> G_SHIFT) & MASK) 1.23 +#define BLUE_8(x) ((x) & MASK) 1.24 + 1.25 +/* 1.26 + * ARMv6 has UQADD8 instruction, which implements unsigned saturated 1.27 + * addition for 8-bit values packed in 32-bit registers. It is very useful 1.28 + * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would 1.29 + * otherwise need a lot of arithmetic operations to simulate this operation). 1.30 + * Since most of the major ARM linux distros are built for ARMv7, we are 1.31 + * much less dependent on runtime CPU detection and can get practical 1.32 + * benefits from conditional compilation here for a lot of users. 1.33 + */ 1.34 + 1.35 +#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ 1.36 + !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) 1.37 +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 1.38 + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 1.39 + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ 1.40 + defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ 1.41 + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ 1.42 + defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) 1.43 + 1.44 +static force_inline uint32_t 1.45 +un8x4_add_un8x4 (uint32_t x, uint32_t y) 1.46 +{ 1.47 + uint32_t t; 1.48 + asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); 1.49 + return t; 1.50 +} 1.51 + 1.52 +#define UN8x4_ADD_UN8x4(x, y) \ 1.53 + ((x) = un8x4_add_un8x4 ((x), (y))) 1.54 + 1.55 +#define UN8_rb_ADD_UN8_rb(x, y, t) \ 1.56 + ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) 1.57 + 1.58 +#define ADD_UN8(x, y, t) \ 1.59 + ((t) = (x), un8x4_add_un8x4 ((t), (y))) 1.60 + 1.61 +#endif 1.62 +#endif 1.63 + 1.64 +/*****************************************************************************/ 1.65 + 1.66 +/* 1.67 + * Helper macros. 1.68 + */ 1.69 + 1.70 +#define MUL_UN8(a, b, t) \ 1.71 + ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) 1.72 + 1.73 +#define DIV_UN8(a, b) \ 1.74 + (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) 1.75 + 1.76 +#ifndef ADD_UN8 1.77 +#define ADD_UN8(x, y, t) \ 1.78 + ((t) = (x) + (y), \ 1.79 + (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) 1.80 +#endif 1.81 + 1.82 +#define DIV_ONE_UN8(x) \ 1.83 + (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) 1.84 + 1.85 +/* 1.86 + * The methods below use some tricks to be able to do two color 1.87 + * components at the same time. 1.88 + */ 1.89 + 1.90 +/* 1.91 + * x_rb = (x_rb * a) / 255 1.92 + */ 1.93 +#define UN8_rb_MUL_UN8(x, a, t) \ 1.94 + do \ 1.95 + { \ 1.96 + t = ((x) & RB_MASK) * (a); \ 1.97 + t += RB_ONE_HALF; \ 1.98 + x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ 1.99 + x &= RB_MASK; \ 1.100 + } while (0) 1.101 + 1.102 +/* 1.103 + * x_rb = min (x_rb + y_rb, 255) 1.104 + */ 1.105 +#ifndef UN8_rb_ADD_UN8_rb 1.106 +#define UN8_rb_ADD_UN8_rb(x, y, t) \ 1.107 + do \ 1.108 + { \ 1.109 + t = ((x) + (y)); \ 1.110 + t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ 1.111 + x = (t & RB_MASK); \ 1.112 + } while (0) 1.113 +#endif 1.114 + 1.115 +/* 1.116 + * x_rb = (x_rb * a_rb) / 255 1.117 + */ 1.118 +#define UN8_rb_MUL_UN8_rb(x, a, t) \ 1.119 + do \ 1.120 + { \ 1.121 + t = (x & MASK) * (a & MASK); \ 1.122 + t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ 1.123 + t += RB_ONE_HALF; \ 1.124 + t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ 1.125 + x = t & RB_MASK; \ 1.126 + } while (0) 1.127 + 1.128 +/* 1.129 + * x_c = (x_c * a) / 255 1.130 + */ 1.131 +#define UN8x4_MUL_UN8(x, a) \ 1.132 + do \ 1.133 + { \ 1.134 + uint32_t r1__, r2__, t__; \ 1.135 + \ 1.136 + r1__ = (x); \ 1.137 + UN8_rb_MUL_UN8 (r1__, (a), t__); \ 1.138 + \ 1.139 + r2__ = (x) >> G_SHIFT; \ 1.140 + UN8_rb_MUL_UN8 (r2__, (a), t__); \ 1.141 + \ 1.142 + (x) = r1__ | (r2__ << G_SHIFT); \ 1.143 + } while (0) 1.144 + 1.145 +/* 1.146 + * x_c = (x_c * a) / 255 + y_c 1.147 + */ 1.148 +#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \ 1.149 + do \ 1.150 + { \ 1.151 + uint32_t r1__, r2__, r3__, t__; \ 1.152 + \ 1.153 + r1__ = (x); \ 1.154 + r2__ = (y) & RB_MASK; \ 1.155 + UN8_rb_MUL_UN8 (r1__, (a), t__); \ 1.156 + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 1.157 + \ 1.158 + r2__ = (x) >> G_SHIFT; \ 1.159 + r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 1.160 + UN8_rb_MUL_UN8 (r2__, (a), t__); \ 1.161 + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 1.162 + \ 1.163 + (x) = r1__ | (r2__ << G_SHIFT); \ 1.164 + } while (0) 1.165 + 1.166 +/* 1.167 + * x_c = (x_c * a + y_c * b) / 255 1.168 + */ 1.169 +#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \ 1.170 + do \ 1.171 + { \ 1.172 + uint32_t r1__, r2__, r3__, t__; \ 1.173 + \ 1.174 + r1__ = (x); \ 1.175 + r2__ = (y); \ 1.176 + UN8_rb_MUL_UN8 (r1__, (a), t__); \ 1.177 + UN8_rb_MUL_UN8 (r2__, (b), t__); \ 1.178 + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 1.179 + \ 1.180 + r2__ = ((x) >> G_SHIFT); \ 1.181 + r3__ = ((y) >> G_SHIFT); \ 1.182 + UN8_rb_MUL_UN8 (r2__, (a), t__); \ 1.183 + UN8_rb_MUL_UN8 (r3__, (b), t__); \ 1.184 + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 1.185 + \ 1.186 + (x) = r1__ | (r2__ << G_SHIFT); \ 1.187 + } while (0) 1.188 + 1.189 +/* 1.190 + * x_c = (x_c * a_c) / 255 1.191 + */ 1.192 +#define UN8x4_MUL_UN8x4(x, a) \ 1.193 + do \ 1.194 + { \ 1.195 + uint32_t r1__, r2__, r3__, t__; \ 1.196 + \ 1.197 + r1__ = (x); \ 1.198 + r2__ = (a); \ 1.199 + UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ 1.200 + \ 1.201 + r2__ = (x) >> G_SHIFT; \ 1.202 + r3__ = (a) >> G_SHIFT; \ 1.203 + UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ 1.204 + \ 1.205 + (x) = r1__ | (r2__ << G_SHIFT); \ 1.206 + } while (0) 1.207 + 1.208 +/* 1.209 + * x_c = (x_c * a_c) / 255 + y_c 1.210 + */ 1.211 +#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \ 1.212 + do \ 1.213 + { \ 1.214 + uint32_t r1__, r2__, r3__, t__; \ 1.215 + \ 1.216 + r1__ = (x); \ 1.217 + r2__ = (a); \ 1.218 + UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ 1.219 + r2__ = (y) & RB_MASK; \ 1.220 + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 1.221 + \ 1.222 + r2__ = ((x) >> G_SHIFT); \ 1.223 + r3__ = ((a) >> G_SHIFT); \ 1.224 + UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ 1.225 + r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 1.226 + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 1.227 + \ 1.228 + (x) = r1__ | (r2__ << G_SHIFT); \ 1.229 + } while (0) 1.230 + 1.231 +/* 1.232 + * x_c = (x_c * a_c + y_c * b) / 255 1.233 + */ 1.234 +#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \ 1.235 + do \ 1.236 + { \ 1.237 + uint32_t r1__, r2__, r3__, t__; \ 1.238 + \ 1.239 + r1__ = (x); \ 1.240 + r2__ = (a); \ 1.241 + UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ 1.242 + r2__ = (y); \ 1.243 + UN8_rb_MUL_UN8 (r2__, (b), t__); \ 1.244 + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 1.245 + \ 1.246 + r2__ = (x) >> G_SHIFT; \ 1.247 + r3__ = (a) >> G_SHIFT; \ 1.248 + UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ 1.249 + r3__ = (y) >> G_SHIFT; \ 1.250 + UN8_rb_MUL_UN8 (r3__, (b), t__); \ 1.251 + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 1.252 + \ 1.253 + x = r1__ | (r2__ << G_SHIFT); \ 1.254 + } while (0) 1.255 + 1.256 +/* 1.257 + x_c = min(x_c + y_c, 255) 1.258 +*/ 1.259 +#ifndef UN8x4_ADD_UN8x4 1.260 +#define UN8x4_ADD_UN8x4(x, y) \ 1.261 + do \ 1.262 + { \ 1.263 + uint32_t r1__, r2__, r3__, t__; \ 1.264 + \ 1.265 + r1__ = (x) & RB_MASK; \ 1.266 + r2__ = (y) & RB_MASK; \ 1.267 + UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 1.268 + \ 1.269 + r2__ = ((x) >> G_SHIFT) & RB_MASK; \ 1.270 + r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 1.271 + UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 1.272 + \ 1.273 + x = r1__ | (r2__ << G_SHIFT); \ 1.274 + } while (0) 1.275 +#endif