|
1 #define COMPONENT_SIZE 8 |
|
2 #define MASK 0xff |
|
3 #define ONE_HALF 0x80 |
|
4 |
|
5 #define A_SHIFT 8 * 3 |
|
6 #define R_SHIFT 8 * 2 |
|
7 #define G_SHIFT 8 |
|
8 #define A_MASK 0xff000000 |
|
9 #define R_MASK 0xff0000 |
|
10 #define G_MASK 0xff00 |
|
11 |
|
12 #define RB_MASK 0xff00ff |
|
13 #define AG_MASK 0xff00ff00 |
|
14 #define RB_ONE_HALF 0x800080 |
|
15 #define RB_MASK_PLUS_ONE 0x10000100 |
|
16 |
|
17 #define ALPHA_8(x) ((x) >> A_SHIFT) |
|
18 #define RED_8(x) (((x) >> R_SHIFT) & MASK) |
|
19 #define GREEN_8(x) (((x) >> G_SHIFT) & MASK) |
|
20 #define BLUE_8(x) ((x) & MASK) |
|
21 |
|
22 /* |
|
23 * ARMv6 has UQADD8 instruction, which implements unsigned saturated |
|
24 * addition for 8-bit values packed in 32-bit registers. It is very useful |
|
25 * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would |
|
26 * otherwise need a lot of arithmetic operations to simulate this operation). |
|
27 * Since most of the major ARM linux distros are built for ARMv7, we are |
|
28 * much less dependent on runtime CPU detection and can get practical |
|
29 * benefits from conditional compilation here for a lot of users. |
|
30 */ |
|
31 |
|
32 #if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ |
|
33 !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) |
|
34 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ |
|
35 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ |
|
36 defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ |
|
37 defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ |
|
38 defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ |
|
39 defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) |
|
40 |
|
41 static force_inline uint32_t |
|
42 un8x4_add_un8x4 (uint32_t x, uint32_t y) |
|
43 { |
|
44 uint32_t t; |
|
45 asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); |
|
46 return t; |
|
47 } |
|
48 |
|
49 #define UN8x4_ADD_UN8x4(x, y) \ |
|
50 ((x) = un8x4_add_un8x4 ((x), (y))) |
|
51 |
|
52 #define UN8_rb_ADD_UN8_rb(x, y, t) \ |
|
53 ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) |
|
54 |
|
55 #define ADD_UN8(x, y, t) \ |
|
56 ((t) = (x), un8x4_add_un8x4 ((t), (y))) |
|
57 |
|
58 #endif |
|
59 #endif |
|
60 |
|
61 /*****************************************************************************/ |
|
62 |
|
63 /* |
|
64 * Helper macros. |
|
65 */ |
|
66 |
|
67 #define MUL_UN8(a, b, t) \ |
|
68 ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) |
|
69 |
|
70 #define DIV_UN8(a, b) \ |
|
71 (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) |
|
72 |
|
73 #ifndef ADD_UN8 |
|
74 #define ADD_UN8(x, y, t) \ |
|
75 ((t) = (x) + (y), \ |
|
76 (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) |
|
77 #endif |
|
78 |
|
79 #define DIV_ONE_UN8(x) \ |
|
80 (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) |
|
81 |
|
82 /* |
|
83 * The methods below use some tricks to be able to do two color |
|
84 * components at the same time. |
|
85 */ |
|
86 |
|
87 /* |
|
88 * x_rb = (x_rb * a) / 255 |
|
89 */ |
|
90 #define UN8_rb_MUL_UN8(x, a, t) \ |
|
91 do \ |
|
92 { \ |
|
93 t = ((x) & RB_MASK) * (a); \ |
|
94 t += RB_ONE_HALF; \ |
|
95 x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ |
|
96 x &= RB_MASK; \ |
|
97 } while (0) |
|
98 |
|
99 /* |
|
100 * x_rb = min (x_rb + y_rb, 255) |
|
101 */ |
|
102 #ifndef UN8_rb_ADD_UN8_rb |
|
103 #define UN8_rb_ADD_UN8_rb(x, y, t) \ |
|
104 do \ |
|
105 { \ |
|
106 t = ((x) + (y)); \ |
|
107 t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ |
|
108 x = (t & RB_MASK); \ |
|
109 } while (0) |
|
110 #endif |
|
111 |
|
112 /* |
|
113 * x_rb = (x_rb * a_rb) / 255 |
|
114 */ |
|
115 #define UN8_rb_MUL_UN8_rb(x, a, t) \ |
|
116 do \ |
|
117 { \ |
|
118 t = (x & MASK) * (a & MASK); \ |
|
119 t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ |
|
120 t += RB_ONE_HALF; \ |
|
121 t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ |
|
122 x = t & RB_MASK; \ |
|
123 } while (0) |
|
124 |
|
125 /* |
|
126 * x_c = (x_c * a) / 255 |
|
127 */ |
|
128 #define UN8x4_MUL_UN8(x, a) \ |
|
129 do \ |
|
130 { \ |
|
131 uint32_t r1__, r2__, t__; \ |
|
132 \ |
|
133 r1__ = (x); \ |
|
134 UN8_rb_MUL_UN8 (r1__, (a), t__); \ |
|
135 \ |
|
136 r2__ = (x) >> G_SHIFT; \ |
|
137 UN8_rb_MUL_UN8 (r2__, (a), t__); \ |
|
138 \ |
|
139 (x) = r1__ | (r2__ << G_SHIFT); \ |
|
140 } while (0) |
|
141 |
|
142 /* |
|
143 * x_c = (x_c * a) / 255 + y_c |
|
144 */ |
|
145 #define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \ |
|
146 do \ |
|
147 { \ |
|
148 uint32_t r1__, r2__, r3__, t__; \ |
|
149 \ |
|
150 r1__ = (x); \ |
|
151 r2__ = (y) & RB_MASK; \ |
|
152 UN8_rb_MUL_UN8 (r1__, (a), t__); \ |
|
153 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
|
154 \ |
|
155 r2__ = (x) >> G_SHIFT; \ |
|
156 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ |
|
157 UN8_rb_MUL_UN8 (r2__, (a), t__); \ |
|
158 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
|
159 \ |
|
160 (x) = r1__ | (r2__ << G_SHIFT); \ |
|
161 } while (0) |
|
162 |
|
163 /* |
|
164 * x_c = (x_c * a + y_c * b) / 255 |
|
165 */ |
|
166 #define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \ |
|
167 do \ |
|
168 { \ |
|
169 uint32_t r1__, r2__, r3__, t__; \ |
|
170 \ |
|
171 r1__ = (x); \ |
|
172 r2__ = (y); \ |
|
173 UN8_rb_MUL_UN8 (r1__, (a), t__); \ |
|
174 UN8_rb_MUL_UN8 (r2__, (b), t__); \ |
|
175 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
|
176 \ |
|
177 r2__ = ((x) >> G_SHIFT); \ |
|
178 r3__ = ((y) >> G_SHIFT); \ |
|
179 UN8_rb_MUL_UN8 (r2__, (a), t__); \ |
|
180 UN8_rb_MUL_UN8 (r3__, (b), t__); \ |
|
181 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
|
182 \ |
|
183 (x) = r1__ | (r2__ << G_SHIFT); \ |
|
184 } while (0) |
|
185 |
|
186 /* |
|
187 * x_c = (x_c * a_c) / 255 |
|
188 */ |
|
189 #define UN8x4_MUL_UN8x4(x, a) \ |
|
190 do \ |
|
191 { \ |
|
192 uint32_t r1__, r2__, r3__, t__; \ |
|
193 \ |
|
194 r1__ = (x); \ |
|
195 r2__ = (a); \ |
|
196 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ |
|
197 \ |
|
198 r2__ = (x) >> G_SHIFT; \ |
|
199 r3__ = (a) >> G_SHIFT; \ |
|
200 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ |
|
201 \ |
|
202 (x) = r1__ | (r2__ << G_SHIFT); \ |
|
203 } while (0) |
|
204 |
|
205 /* |
|
206 * x_c = (x_c * a_c) / 255 + y_c |
|
207 */ |
|
208 #define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \ |
|
209 do \ |
|
210 { \ |
|
211 uint32_t r1__, r2__, r3__, t__; \ |
|
212 \ |
|
213 r1__ = (x); \ |
|
214 r2__ = (a); \ |
|
215 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ |
|
216 r2__ = (y) & RB_MASK; \ |
|
217 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
|
218 \ |
|
219 r2__ = ((x) >> G_SHIFT); \ |
|
220 r3__ = ((a) >> G_SHIFT); \ |
|
221 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ |
|
222 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ |
|
223 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
|
224 \ |
|
225 (x) = r1__ | (r2__ << G_SHIFT); \ |
|
226 } while (0) |
|
227 |
|
228 /* |
|
229 * x_c = (x_c * a_c + y_c * b) / 255 |
|
230 */ |
|
231 #define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \ |
|
232 do \ |
|
233 { \ |
|
234 uint32_t r1__, r2__, r3__, t__; \ |
|
235 \ |
|
236 r1__ = (x); \ |
|
237 r2__ = (a); \ |
|
238 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ |
|
239 r2__ = (y); \ |
|
240 UN8_rb_MUL_UN8 (r2__, (b), t__); \ |
|
241 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
|
242 \ |
|
243 r2__ = (x) >> G_SHIFT; \ |
|
244 r3__ = (a) >> G_SHIFT; \ |
|
245 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ |
|
246 r3__ = (y) >> G_SHIFT; \ |
|
247 UN8_rb_MUL_UN8 (r3__, (b), t__); \ |
|
248 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
|
249 \ |
|
250 x = r1__ | (r2__ << G_SHIFT); \ |
|
251 } while (0) |
|
252 |
|
253 /* |
|
254 x_c = min(x_c + y_c, 255) |
|
255 */ |
|
256 #ifndef UN8x4_ADD_UN8x4 |
|
257 #define UN8x4_ADD_UN8x4(x, y) \ |
|
258 do \ |
|
259 { \ |
|
260 uint32_t r1__, r2__, r3__, t__; \ |
|
261 \ |
|
262 r1__ = (x) & RB_MASK; \ |
|
263 r2__ = (y) & RB_MASK; \ |
|
264 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
|
265 \ |
|
266 r2__ = ((x) >> G_SHIFT) & RB_MASK; \ |
|
267 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ |
|
268 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
|
269 \ |
|
270 x = r1__ | (r2__ << G_SHIFT); \ |
|
271 } while (0) |
|
272 #endif |