media/libyuv/source/row_common.cc

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:3d2a8f38760d
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <string.h> // For memcpy and memset.
14
15 #include "libyuv/basic_types.h"
16
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
23
24 #define USE_BRANCHLESS 1
25 #if USE_BRANCHLESS
26 static __inline int32 clamp0(int32 v) {
27 return ((-(v) >> 31) & (v));
28 }
29
30 static __inline int32 clamp255(int32 v) {
31 return (((255 - (v)) >> 31) | (v)) & 255;
32 }
33
34 static __inline uint32 Clamp(int32 val) {
35 int v = clamp0(val);
36 return (uint32)(clamp255(v));
37 }
38
39 static __inline uint32 Abs(int32 v) {
40 int m = v >> 31;
41 return (v + m) ^ m;
42 }
43 #else // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45 return (v < 0) ? 0 : v;
46 }
47
48 static __inline int32 clamp255(int32 v) {
49 return (v > 255) ? 255 : v;
50 }
51
52 static __inline uint32 Clamp(int32 val) {
53 int v = clamp0(val);
54 return (uint32)(clamp255(v));
55 }
56
57 static __inline uint32 Abs(int32 v) {
58 return (v < 0) ? -v : v;
59 }
60 #endif // USE_BRANCHLESS
61
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
64 #else
65 static inline void WRITEWORD(uint8* p, uint32 v) {
66 p[0] = (uint8)(v & 255);
67 p[1] = (uint8)((v >> 8) & 255);
68 p[2] = (uint8)((v >> 16) & 255);
69 p[3] = (uint8)((v >> 24) & 255);
70 }
71 #endif
72
73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74 int x;
75 for (x = 0; x < width; ++x) {
76 uint8 b = src_rgb24[0];
77 uint8 g = src_rgb24[1];
78 uint8 r = src_rgb24[2];
79 dst_argb[0] = b;
80 dst_argb[1] = g;
81 dst_argb[2] = r;
82 dst_argb[3] = 255u;
83 dst_argb += 4;
84 src_rgb24 += 3;
85 }
86 }
87
88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89 int x;
90 for (x = 0; x < width; ++x) {
91 uint8 r = src_raw[0];
92 uint8 g = src_raw[1];
93 uint8 b = src_raw[2];
94 dst_argb[0] = b;
95 dst_argb[1] = g;
96 dst_argb[2] = r;
97 dst_argb[3] = 255u;
98 dst_argb += 4;
99 src_raw += 3;
100 }
101 }
102
103 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
104 int x;
105 for (x = 0; x < width; ++x) {
106 uint8 b = src_rgb565[0] & 0x1f;
107 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
108 uint8 r = src_rgb565[1] >> 3;
109 dst_argb[0] = (b << 3) | (b >> 2);
110 dst_argb[1] = (g << 2) | (g >> 4);
111 dst_argb[2] = (r << 3) | (r >> 2);
112 dst_argb[3] = 255u;
113 dst_argb += 4;
114 src_rgb565 += 2;
115 }
116 }
117
118 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
119 int width) {
120 int x;
121 for (x = 0; x < width; ++x) {
122 uint8 b = src_argb1555[0] & 0x1f;
123 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
124 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
125 uint8 a = src_argb1555[1] >> 7;
126 dst_argb[0] = (b << 3) | (b >> 2);
127 dst_argb[1] = (g << 3) | (g >> 2);
128 dst_argb[2] = (r << 3) | (r >> 2);
129 dst_argb[3] = -a;
130 dst_argb += 4;
131 src_argb1555 += 2;
132 }
133 }
134
135 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
136 int width) {
137 int x;
138 for (x = 0; x < width; ++x) {
139 uint8 b = src_argb4444[0] & 0x0f;
140 uint8 g = src_argb4444[0] >> 4;
141 uint8 r = src_argb4444[1] & 0x0f;
142 uint8 a = src_argb4444[1] >> 4;
143 dst_argb[0] = (b << 4) | b;
144 dst_argb[1] = (g << 4) | g;
145 dst_argb[2] = (r << 4) | r;
146 dst_argb[3] = (a << 4) | a;
147 dst_argb += 4;
148 src_argb4444 += 2;
149 }
150 }
151
152 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
153 int x;
154 for (x = 0; x < width; ++x) {
155 uint8 b = src_argb[0];
156 uint8 g = src_argb[1];
157 uint8 r = src_argb[2];
158 dst_rgb[0] = b;
159 dst_rgb[1] = g;
160 dst_rgb[2] = r;
161 dst_rgb += 3;
162 src_argb += 4;
163 }
164 }
165
166 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
167 int x;
168 for (x = 0; x < width; ++x) {
169 uint8 b = src_argb[0];
170 uint8 g = src_argb[1];
171 uint8 r = src_argb[2];
172 dst_rgb[0] = r;
173 dst_rgb[1] = g;
174 dst_rgb[2] = b;
175 dst_rgb += 3;
176 src_argb += 4;
177 }
178 }
179
180 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
181 int x;
182 for (x = 0; x < width - 1; x += 2) {
183 uint8 b0 = src_argb[0] >> 3;
184 uint8 g0 = src_argb[1] >> 2;
185 uint8 r0 = src_argb[2] >> 3;
186 uint8 b1 = src_argb[4] >> 3;
187 uint8 g1 = src_argb[5] >> 2;
188 uint8 r1 = src_argb[6] >> 3;
189 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
190 (b1 << 16) | (g1 << 21) | (r1 << 27));
191 dst_rgb += 4;
192 src_argb += 8;
193 }
194 if (width & 1) {
195 uint8 b0 = src_argb[0] >> 3;
196 uint8 g0 = src_argb[1] >> 2;
197 uint8 r0 = src_argb[2] >> 3;
198 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
199 }
200 }
201
202 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
203 int x;
204 for (x = 0; x < width - 1; x += 2) {
205 uint8 b0 = src_argb[0] >> 3;
206 uint8 g0 = src_argb[1] >> 3;
207 uint8 r0 = src_argb[2] >> 3;
208 uint8 a0 = src_argb[3] >> 7;
209 uint8 b1 = src_argb[4] >> 3;
210 uint8 g1 = src_argb[5] >> 3;
211 uint8 r1 = src_argb[6] >> 3;
212 uint8 a1 = src_argb[7] >> 7;
213 *(uint32*)(dst_rgb) =
214 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
215 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
216 dst_rgb += 4;
217 src_argb += 8;
218 }
219 if (width & 1) {
220 uint8 b0 = src_argb[0] >> 3;
221 uint8 g0 = src_argb[1] >> 3;
222 uint8 r0 = src_argb[2] >> 3;
223 uint8 a0 = src_argb[3] >> 7;
224 *(uint16*)(dst_rgb) =
225 b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
226 }
227 }
228
229 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
230 int x;
231 for (x = 0; x < width - 1; x += 2) {
232 uint8 b0 = src_argb[0] >> 4;
233 uint8 g0 = src_argb[1] >> 4;
234 uint8 r0 = src_argb[2] >> 4;
235 uint8 a0 = src_argb[3] >> 4;
236 uint8 b1 = src_argb[4] >> 4;
237 uint8 g1 = src_argb[5] >> 4;
238 uint8 r1 = src_argb[6] >> 4;
239 uint8 a1 = src_argb[7] >> 4;
240 *(uint32*)(dst_rgb) =
241 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
242 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
243 dst_rgb += 4;
244 src_argb += 8;
245 }
246 if (width & 1) {
247 uint8 b0 = src_argb[0] >> 4;
248 uint8 g0 = src_argb[1] >> 4;
249 uint8 r0 = src_argb[2] >> 4;
250 uint8 a0 = src_argb[3] >> 4;
251 *(uint16*)(dst_rgb) =
252 b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
253 }
254 }
255
256 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
257 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
258 }
259
260 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
261 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
262 }
263 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
264 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
265 }
266
267 #define MAKEROWY(NAME, R, G, B, BPP) \
268 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
269 int x; \
270 for (x = 0; x < width; ++x) { \
271 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
272 src_argb0 += BPP; \
273 dst_y += 1; \
274 } \
275 } \
276 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
277 uint8* dst_u, uint8* dst_v, int width) { \
278 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
279 int x; \
280 for (x = 0; x < width - 1; x += 2) { \
281 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \
282 src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \
283 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \
284 src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \
285 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \
286 src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \
287 dst_u[0] = RGBToU(ar, ag, ab); \
288 dst_v[0] = RGBToV(ar, ag, ab); \
289 src_rgb0 += BPP * 2; \
290 src_rgb1 += BPP * 2; \
291 dst_u += 1; \
292 dst_v += 1; \
293 } \
294 if (width & 1) { \
295 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
296 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
297 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
298 dst_u[0] = RGBToU(ar, ag, ab); \
299 dst_v[0] = RGBToV(ar, ag, ab); \
300 } \
301 }
302
303 MAKEROWY(ARGB, 2, 1, 0, 4)
304 MAKEROWY(BGRA, 1, 2, 3, 4)
305 MAKEROWY(ABGR, 0, 1, 2, 4)
306 MAKEROWY(RGBA, 3, 2, 1, 4)
307 MAKEROWY(RGB24, 2, 1, 0, 3)
308 MAKEROWY(RAW, 0, 1, 2, 3)
309 #undef MAKEROWY
310
311 // JPeg uses a variation on BT.601-1 full range
312 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
313 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
314 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
315 // BT.601 Mpeg range uses:
316 // b 0.1016 * 255 = 25.908 = 25
317 // g 0.5078 * 255 = 129.489 = 129
318 // r 0.2578 * 255 = 65.739 = 66
319 // JPeg 8 bit Y (not used):
320 // b 0.11400 * 256 = 29.184 = 29
321 // g 0.58700 * 256 = 150.272 = 150
322 // r 0.29900 * 256 = 76.544 = 77
323 // JPeg 7 bit Y:
324 // b 0.11400 * 128 = 14.592 = 15
325 // g 0.58700 * 128 = 75.136 = 75
326 // r 0.29900 * 128 = 38.272 = 38
327 // JPeg 8 bit U:
328 // b 0.50000 * 255 = 127.5 = 127
329 // g -0.33126 * 255 = -84.4713 = -84
330 // r -0.16874 * 255 = -43.0287 = -43
331 // JPeg 8 bit V:
332 // b -0.08131 * 255 = -20.73405 = -20
333 // g -0.41869 * 255 = -106.76595 = -107
334 // r 0.50000 * 255 = 127.5 = 127
335
336 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
337 return (38 * r + 75 * g + 15 * b + 64) >> 7;
338 }
339
340 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
341 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
342 }
343 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
344 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
345 }
346
347 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
348
349 #define MAKEROWYJ(NAME, R, G, B, BPP) \
350 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
351 int x; \
352 for (x = 0; x < width; ++x) { \
353 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
354 src_argb0 += BPP; \
355 dst_y += 1; \
356 } \
357 } \
358 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
359 uint8* dst_u, uint8* dst_v, int width) { \
360 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
361 int x; \
362 for (x = 0; x < width - 1; x += 2) { \
363 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
364 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
365 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
366 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
367 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
368 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
369 dst_u[0] = RGBToUJ(ar, ag, ab); \
370 dst_v[0] = RGBToVJ(ar, ag, ab); \
371 src_rgb0 += BPP * 2; \
372 src_rgb1 += BPP * 2; \
373 dst_u += 1; \
374 dst_v += 1; \
375 } \
376 if (width & 1) { \
377 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
378 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
379 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
380 dst_u[0] = RGBToUJ(ar, ag, ab); \
381 dst_v[0] = RGBToVJ(ar, ag, ab); \
382 } \
383 }
384
385 MAKEROWYJ(ARGB, 2, 1, 0, 4)
386 #undef MAKEROWYJ
387
388 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
389 int x;
390 for (x = 0; x < width; ++x) {
391 uint8 b = src_rgb565[0] & 0x1f;
392 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
393 uint8 r = src_rgb565[1] >> 3;
394 b = (b << 3) | (b >> 2);
395 g = (g << 2) | (g >> 4);
396 r = (r << 3) | (r >> 2);
397 dst_y[0] = RGBToY(r, g, b);
398 src_rgb565 += 2;
399 dst_y += 1;
400 }
401 }
402
403 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
404 int x;
405 for (x = 0; x < width; ++x) {
406 uint8 b = src_argb1555[0] & 0x1f;
407 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
408 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
409 b = (b << 3) | (b >> 2);
410 g = (g << 3) | (g >> 2);
411 r = (r << 3) | (r >> 2);
412 dst_y[0] = RGBToY(r, g, b);
413 src_argb1555 += 2;
414 dst_y += 1;
415 }
416 }
417
418 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
419 int x;
420 for (x = 0; x < width; ++x) {
421 uint8 b = src_argb4444[0] & 0x0f;
422 uint8 g = src_argb4444[0] >> 4;
423 uint8 r = src_argb4444[1] & 0x0f;
424 b = (b << 4) | b;
425 g = (g << 4) | g;
426 r = (r << 4) | r;
427 dst_y[0] = RGBToY(r, g, b);
428 src_argb4444 += 2;
429 dst_y += 1;
430 }
431 }
432
433 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
434 uint8* dst_u, uint8* dst_v, int width) {
435 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
436 int x;
437 for (x = 0; x < width - 1; x += 2) {
438 uint8 b0 = src_rgb565[0] & 0x1f;
439 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
440 uint8 r0 = src_rgb565[1] >> 3;
441 uint8 b1 = src_rgb565[2] & 0x1f;
442 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
443 uint8 r1 = src_rgb565[3] >> 3;
444 uint8 b2 = next_rgb565[0] & 0x1f;
445 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
446 uint8 r2 = next_rgb565[1] >> 3;
447 uint8 b3 = next_rgb565[2] & 0x1f;
448 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
449 uint8 r3 = next_rgb565[3] >> 3;
450 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
451 uint8 g = (g0 + g1 + g2 + g3);
452 uint8 r = (r0 + r1 + r2 + r3);
453 b = (b << 1) | (b >> 6); // 787 -> 888.
454 r = (r << 1) | (r >> 6);
455 dst_u[0] = RGBToU(r, g, b);
456 dst_v[0] = RGBToV(r, g, b);
457 src_rgb565 += 4;
458 next_rgb565 += 4;
459 dst_u += 1;
460 dst_v += 1;
461 }
462 if (width & 1) {
463 uint8 b0 = src_rgb565[0] & 0x1f;
464 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
465 uint8 r0 = src_rgb565[1] >> 3;
466 uint8 b2 = next_rgb565[0] & 0x1f;
467 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
468 uint8 r2 = next_rgb565[1] >> 3;
469 uint8 b = (b0 + b2); // 565 * 2 = 676.
470 uint8 g = (g0 + g2);
471 uint8 r = (r0 + r2);
472 b = (b << 2) | (b >> 4); // 676 -> 888
473 g = (g << 1) | (g >> 6);
474 r = (r << 2) | (r >> 4);
475 dst_u[0] = RGBToU(r, g, b);
476 dst_v[0] = RGBToV(r, g, b);
477 }
478 }
479
480 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
481 uint8* dst_u, uint8* dst_v, int width) {
482 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
483 int x;
484 for (x = 0; x < width - 1; x += 2) {
485 uint8 b0 = src_argb1555[0] & 0x1f;
486 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
487 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
488 uint8 b1 = src_argb1555[2] & 0x1f;
489 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
490 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
491 uint8 b2 = next_argb1555[0] & 0x1f;
492 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
493 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
494 uint8 b3 = next_argb1555[2] & 0x1f;
495 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
496 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
497 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
498 uint8 g = (g0 + g1 + g2 + g3);
499 uint8 r = (r0 + r1 + r2 + r3);
500 b = (b << 1) | (b >> 6); // 777 -> 888.
501 g = (g << 1) | (g >> 6);
502 r = (r << 1) | (r >> 6);
503 dst_u[0] = RGBToU(r, g, b);
504 dst_v[0] = RGBToV(r, g, b);
505 src_argb1555 += 4;
506 next_argb1555 += 4;
507 dst_u += 1;
508 dst_v += 1;
509 }
510 if (width & 1) {
511 uint8 b0 = src_argb1555[0] & 0x1f;
512 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
513 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
514 uint8 b2 = next_argb1555[0] & 0x1f;
515 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
516 uint8 r2 = next_argb1555[1] >> 3;
517 uint8 b = (b0 + b2); // 555 * 2 = 666.
518 uint8 g = (g0 + g2);
519 uint8 r = (r0 + r2);
520 b = (b << 2) | (b >> 4); // 666 -> 888.
521 g = (g << 2) | (g >> 4);
522 r = (r << 2) | (r >> 4);
523 dst_u[0] = RGBToU(r, g, b);
524 dst_v[0] = RGBToV(r, g, b);
525 }
526 }
527
528 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
529 uint8* dst_u, uint8* dst_v, int width) {
530 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
531 int x;
532 for (x = 0; x < width - 1; x += 2) {
533 uint8 b0 = src_argb4444[0] & 0x0f;
534 uint8 g0 = src_argb4444[0] >> 4;
535 uint8 r0 = src_argb4444[1] & 0x0f;
536 uint8 b1 = src_argb4444[2] & 0x0f;
537 uint8 g1 = src_argb4444[2] >> 4;
538 uint8 r1 = src_argb4444[3] & 0x0f;
539 uint8 b2 = next_argb4444[0] & 0x0f;
540 uint8 g2 = next_argb4444[0] >> 4;
541 uint8 r2 = next_argb4444[1] & 0x0f;
542 uint8 b3 = next_argb4444[2] & 0x0f;
543 uint8 g3 = next_argb4444[2] >> 4;
544 uint8 r3 = next_argb4444[3] & 0x0f;
545 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
546 uint8 g = (g0 + g1 + g2 + g3);
547 uint8 r = (r0 + r1 + r2 + r3);
548 b = (b << 2) | (b >> 4); // 666 -> 888.
549 g = (g << 2) | (g >> 4);
550 r = (r << 2) | (r >> 4);
551 dst_u[0] = RGBToU(r, g, b);
552 dst_v[0] = RGBToV(r, g, b);
553 src_argb4444 += 4;
554 next_argb4444 += 4;
555 dst_u += 1;
556 dst_v += 1;
557 }
558 if (width & 1) {
559 uint8 b0 = src_argb4444[0] & 0x0f;
560 uint8 g0 = src_argb4444[0] >> 4;
561 uint8 r0 = src_argb4444[1] & 0x0f;
562 uint8 b2 = next_argb4444[0] & 0x0f;
563 uint8 g2 = next_argb4444[0] >> 4;
564 uint8 r2 = next_argb4444[1] & 0x0f;
565 uint8 b = (b0 + b2); // 444 * 2 = 555.
566 uint8 g = (g0 + g2);
567 uint8 r = (r0 + r2);
568 b = (b << 3) | (b >> 2); // 555 -> 888.
569 g = (g << 3) | (g >> 2);
570 r = (r << 3) | (r >> 2);
571 dst_u[0] = RGBToU(r, g, b);
572 dst_v[0] = RGBToV(r, g, b);
573 }
574 }
575
576 void ARGBToUV444Row_C(const uint8* src_argb,
577 uint8* dst_u, uint8* dst_v, int width) {
578 int x;
579 for (x = 0; x < width; ++x) {
580 uint8 ab = src_argb[0];
581 uint8 ag = src_argb[1];
582 uint8 ar = src_argb[2];
583 dst_u[0] = RGBToU(ar, ag, ab);
584 dst_v[0] = RGBToV(ar, ag, ab);
585 src_argb += 4;
586 dst_u += 1;
587 dst_v += 1;
588 }
589 }
590
591 void ARGBToUV422Row_C(const uint8* src_argb,
592 uint8* dst_u, uint8* dst_v, int width) {
593 int x;
594 for (x = 0; x < width - 1; x += 2) {
595 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
596 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
597 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
598 dst_u[0] = RGBToU(ar, ag, ab);
599 dst_v[0] = RGBToV(ar, ag, ab);
600 src_argb += 8;
601 dst_u += 1;
602 dst_v += 1;
603 }
604 if (width & 1) {
605 uint8 ab = src_argb[0];
606 uint8 ag = src_argb[1];
607 uint8 ar = src_argb[2];
608 dst_u[0] = RGBToU(ar, ag, ab);
609 dst_v[0] = RGBToV(ar, ag, ab);
610 }
611 }
612
613 void ARGBToUV411Row_C(const uint8* src_argb,
614 uint8* dst_u, uint8* dst_v, int width) {
615 int x;
616 for (x = 0; x < width - 3; x += 4) {
617 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
618 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
619 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
620 dst_u[0] = RGBToU(ar, ag, ab);
621 dst_v[0] = RGBToV(ar, ag, ab);
622 src_argb += 16;
623 dst_u += 1;
624 dst_v += 1;
625 }
626 if ((width & 3) == 3) {
627 uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
628 uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
629 uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
630 dst_u[0] = RGBToU(ar, ag, ab);
631 dst_v[0] = RGBToV(ar, ag, ab);
632 } else if ((width & 3) == 2) {
633 uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
634 uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
635 uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
636 dst_u[0] = RGBToU(ar, ag, ab);
637 dst_v[0] = RGBToV(ar, ag, ab);
638 } else if ((width & 3) == 1) {
639 uint8 ab = src_argb[0];
640 uint8 ag = src_argb[1];
641 uint8 ar = src_argb[2];
642 dst_u[0] = RGBToU(ar, ag, ab);
643 dst_v[0] = RGBToV(ar, ag, ab);
644 }
645 }
646
647 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
648 int x;
649 for (x = 0; x < width; ++x) {
650 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
651 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
652 dst_argb[3] = src_argb[3];
653 dst_argb += 4;
654 src_argb += 4;
655 }
656 }
657
658 // Convert a row of image to Sepia tone.
659 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
660 int x;
661 for (x = 0; x < width; ++x) {
662 int b = dst_argb[0];
663 int g = dst_argb[1];
664 int r = dst_argb[2];
665 int sb = (b * 17 + g * 68 + r * 35) >> 7;
666 int sg = (b * 22 + g * 88 + r * 45) >> 7;
667 int sr = (b * 24 + g * 98 + r * 50) >> 7;
668 // b does not over flow. a is preserved from original.
669 dst_argb[0] = sb;
670 dst_argb[1] = clamp255(sg);
671 dst_argb[2] = clamp255(sr);
672 dst_argb += 4;
673 }
674 }
675
676 // Apply color matrix to a row of image. Matrix is signed.
677 // TODO(fbarchard): Consider adding rounding (+32).
678 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
679 const int8* matrix_argb, int width) {
680 int x;
681 for (x = 0; x < width; ++x) {
682 int b = src_argb[0];
683 int g = src_argb[1];
684 int r = src_argb[2];
685 int a = src_argb[3];
686 int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
687 r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
688 int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
689 r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
690 int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
691 r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
692 int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
693 r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
694 dst_argb[0] = Clamp(sb);
695 dst_argb[1] = Clamp(sg);
696 dst_argb[2] = Clamp(sr);
697 dst_argb[3] = Clamp(sa);
698 src_argb += 4;
699 dst_argb += 4;
700 }
701 }
702
703 // Apply color table to a row of image.
704 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
705 int x;
706 for (x = 0; x < width; ++x) {
707 int b = dst_argb[0];
708 int g = dst_argb[1];
709 int r = dst_argb[2];
710 int a = dst_argb[3];
711 dst_argb[0] = table_argb[b * 4 + 0];
712 dst_argb[1] = table_argb[g * 4 + 1];
713 dst_argb[2] = table_argb[r * 4 + 2];
714 dst_argb[3] = table_argb[a * 4 + 3];
715 dst_argb += 4;
716 }
717 }
718
719 // Apply color table to a row of image.
720 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
721 int x;
722 for (x = 0; x < width; ++x) {
723 int b = dst_argb[0];
724 int g = dst_argb[1];
725 int r = dst_argb[2];
726 dst_argb[0] = table_argb[b * 4 + 0];
727 dst_argb[1] = table_argb[g * 4 + 1];
728 dst_argb[2] = table_argb[r * 4 + 2];
729 dst_argb += 4;
730 }
731 }
732
733 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
734 int interval_offset, int width) {
735 int x;
736 for (x = 0; x < width; ++x) {
737 int b = dst_argb[0];
738 int g = dst_argb[1];
739 int r = dst_argb[2];
740 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
741 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
742 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
743 dst_argb += 4;
744 }
745 }
746
747 #define REPEAT8(v) (v) | ((v) << 8)
748 #define SHADE(f, v) v * f >> 24
749
750 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
751 uint32 value) {
752 const uint32 b_scale = REPEAT8(value & 0xff);
753 const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
754 const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
755 const uint32 a_scale = REPEAT8(value >> 24);
756
757 int i;
758 for (i = 0; i < width; ++i) {
759 const uint32 b = REPEAT8(src_argb[0]);
760 const uint32 g = REPEAT8(src_argb[1]);
761 const uint32 r = REPEAT8(src_argb[2]);
762 const uint32 a = REPEAT8(src_argb[3]);
763 dst_argb[0] = SHADE(b, b_scale);
764 dst_argb[1] = SHADE(g, g_scale);
765 dst_argb[2] = SHADE(r, r_scale);
766 dst_argb[3] = SHADE(a, a_scale);
767 src_argb += 4;
768 dst_argb += 4;
769 }
770 }
771 #undef REPEAT8
772 #undef SHADE
773
774 #define REPEAT8(v) (v) | ((v) << 8)
775 #define SHADE(f, v) v * f >> 16
776
777 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
778 uint8* dst_argb, int width) {
779 int i;
780 for (i = 0; i < width; ++i) {
781 const uint32 b = REPEAT8(src_argb0[0]);
782 const uint32 g = REPEAT8(src_argb0[1]);
783 const uint32 r = REPEAT8(src_argb0[2]);
784 const uint32 a = REPEAT8(src_argb0[3]);
785 const uint32 b_scale = src_argb1[0];
786 const uint32 g_scale = src_argb1[1];
787 const uint32 r_scale = src_argb1[2];
788 const uint32 a_scale = src_argb1[3];
789 dst_argb[0] = SHADE(b, b_scale);
790 dst_argb[1] = SHADE(g, g_scale);
791 dst_argb[2] = SHADE(r, r_scale);
792 dst_argb[3] = SHADE(a, a_scale);
793 src_argb0 += 4;
794 src_argb1 += 4;
795 dst_argb += 4;
796 }
797 }
798 #undef REPEAT8
799 #undef SHADE
800
801 #define SHADE(f, v) clamp255(v + f)
802
803 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
804 uint8* dst_argb, int width) {
805 int i;
806 for (i = 0; i < width; ++i) {
807 const int b = src_argb0[0];
808 const int g = src_argb0[1];
809 const int r = src_argb0[2];
810 const int a = src_argb0[3];
811 const int b_add = src_argb1[0];
812 const int g_add = src_argb1[1];
813 const int r_add = src_argb1[2];
814 const int a_add = src_argb1[3];
815 dst_argb[0] = SHADE(b, b_add);
816 dst_argb[1] = SHADE(g, g_add);
817 dst_argb[2] = SHADE(r, r_add);
818 dst_argb[3] = SHADE(a, a_add);
819 src_argb0 += 4;
820 src_argb1 += 4;
821 dst_argb += 4;
822 }
823 }
824 #undef SHADE
825
826 #define SHADE(f, v) clamp0(f - v)
827
828 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
829 uint8* dst_argb, int width) {
830 int i;
831 for (i = 0; i < width; ++i) {
832 const int b = src_argb0[0];
833 const int g = src_argb0[1];
834 const int r = src_argb0[2];
835 const int a = src_argb0[3];
836 const int b_sub = src_argb1[0];
837 const int g_sub = src_argb1[1];
838 const int r_sub = src_argb1[2];
839 const int a_sub = src_argb1[3];
840 dst_argb[0] = SHADE(b, b_sub);
841 dst_argb[1] = SHADE(g, g_sub);
842 dst_argb[2] = SHADE(r, r_sub);
843 dst_argb[3] = SHADE(a, a_sub);
844 src_argb0 += 4;
845 src_argb1 += 4;
846 dst_argb += 4;
847 }
848 }
849 #undef SHADE
850
851 // Sobel functions which mimics SSSE3.
852 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
853 uint8* dst_sobelx, int width) {
854 int i;
855 for (i = 0; i < width; ++i) {
856 int a = src_y0[i];
857 int b = src_y1[i];
858 int c = src_y2[i];
859 int a_sub = src_y0[i + 2];
860 int b_sub = src_y1[i + 2];
861 int c_sub = src_y2[i + 2];
862 int a_diff = a - a_sub;
863 int b_diff = b - b_sub;
864 int c_diff = c - c_sub;
865 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
866 dst_sobelx[i] = (uint8)(clamp255(sobel));
867 }
868 }
869
870 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
871 uint8* dst_sobely, int width) {
872 int i;
873 for (i = 0; i < width; ++i) {
874 int a = src_y0[i + 0];
875 int b = src_y0[i + 1];
876 int c = src_y0[i + 2];
877 int a_sub = src_y1[i + 0];
878 int b_sub = src_y1[i + 1];
879 int c_sub = src_y1[i + 2];
880 int a_diff = a - a_sub;
881 int b_diff = b - b_sub;
882 int c_diff = c - c_sub;
883 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
884 dst_sobely[i] = (uint8)(clamp255(sobel));
885 }
886 }
887
888 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
889 uint8* dst_argb, int width) {
890 int i;
891 for (i = 0; i < width; ++i) {
892 int r = src_sobelx[i];
893 int b = src_sobely[i];
894 int s = clamp255(r + b);
895 dst_argb[0] = (uint8)(s);
896 dst_argb[1] = (uint8)(s);
897 dst_argb[2] = (uint8)(s);
898 dst_argb[3] = (uint8)(255u);
899 dst_argb += 4;
900 }
901 }
902
903 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
904 uint8* dst_y, int width) {
905 int i;
906 for (i = 0; i < width; ++i) {
907 int r = src_sobelx[i];
908 int b = src_sobely[i];
909 int s = clamp255(r + b);
910 dst_y[i] = (uint8)(s);
911 }
912 }
913
914 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
915 uint8* dst_argb, int width) {
916 int i;
917 for (i = 0; i < width; ++i) {
918 int r = src_sobelx[i];
919 int b = src_sobely[i];
920 int g = clamp255(r + b);
921 dst_argb[0] = (uint8)(b);
922 dst_argb[1] = (uint8)(g);
923 dst_argb[2] = (uint8)(r);
924 dst_argb[3] = (uint8)(255u);
925 dst_argb += 4;
926 }
927 }
928
929 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
930 // Copy a Y to RGB.
931 int x;
932 for (x = 0; x < width; ++x) {
933 uint8 y = src_y[0];
934 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
935 dst_argb[3] = 255u;
936 dst_argb += 4;
937 ++src_y;
938 }
939 }
940
941 // C reference code that mimics the YUV assembly.
942
943 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */
944
945 #define UB 127 /* min(63,(int8)(2.018 * 64)) */
946 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
947 #define UR 0
948
949 #define VB 0
950 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
951 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */
952
953 // Bias
954 #define BB UB * 128 + VB * 128
955 #define BG UG * 128 + VG * 128
956 #define BR UR * 128 + VR * 128
957
958 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
959 uint8* b, uint8* g, uint8* r) {
960 int32 y1 = ((int32)(y) - 16) * YG;
961 *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
962 *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
963 *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
964 }
965
966 #if !defined(LIBYUV_DISABLE_NEON) && \
967 (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
968 // C mimic assembly.
969 // TODO(fbarchard): Remove subsampling from Neon.
970 void I444ToARGBRow_C(const uint8* src_y,
971 const uint8* src_u,
972 const uint8* src_v,
973 uint8* rgb_buf,
974 int width) {
975 int x;
976 for (x = 0; x < width - 1; x += 2) {
977 uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
978 uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
979 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
980 rgb_buf[3] = 255;
981 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
982 rgb_buf[7] = 255;
983 src_y += 2;
984 src_u += 2;
985 src_v += 2;
986 rgb_buf += 8; // Advance 2 pixels.
987 }
988 if (width & 1) {
989 YuvPixel(src_y[0], src_u[0], src_v[0],
990 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
991 }
992 }
993 #else
994 void I444ToARGBRow_C(const uint8* src_y,
995 const uint8* src_u,
996 const uint8* src_v,
997 uint8* rgb_buf,
998 int width) {
999 int x;
1000 for (x = 0; x < width; ++x) {
1001 YuvPixel(src_y[0], src_u[0], src_v[0],
1002 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1003 rgb_buf[3] = 255;
1004 src_y += 1;
1005 src_u += 1;
1006 src_v += 1;
1007 rgb_buf += 4; // Advance 1 pixel.
1008 }
1009 }
1010 #endif
1011 // Also used for 420
1012 void I422ToARGBRow_C(const uint8* src_y,
1013 const uint8* src_u,
1014 const uint8* src_v,
1015 uint8* rgb_buf,
1016 int width) {
1017 int x;
1018 for (x = 0; x < width - 1; x += 2) {
1019 YuvPixel(src_y[0], src_u[0], src_v[0],
1020 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1021 rgb_buf[3] = 255;
1022 YuvPixel(src_y[1], src_u[0], src_v[0],
1023 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1024 rgb_buf[7] = 255;
1025 src_y += 2;
1026 src_u += 1;
1027 src_v += 1;
1028 rgb_buf += 8; // Advance 2 pixels.
1029 }
1030 if (width & 1) {
1031 YuvPixel(src_y[0], src_u[0], src_v[0],
1032 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1033 rgb_buf[3] = 255;
1034 }
1035 }
1036
1037 void I422ToRGB24Row_C(const uint8* src_y,
1038 const uint8* src_u,
1039 const uint8* src_v,
1040 uint8* rgb_buf,
1041 int width) {
1042 int x;
1043 for (x = 0; x < width - 1; x += 2) {
1044 YuvPixel(src_y[0], src_u[0], src_v[0],
1045 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1046 YuvPixel(src_y[1], src_u[0], src_v[0],
1047 rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
1048 src_y += 2;
1049 src_u += 1;
1050 src_v += 1;
1051 rgb_buf += 6; // Advance 2 pixels.
1052 }
1053 if (width & 1) {
1054 YuvPixel(src_y[0], src_u[0], src_v[0],
1055 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1056 }
1057 }
1058
1059 void I422ToRAWRow_C(const uint8* src_y,
1060 const uint8* src_u,
1061 const uint8* src_v,
1062 uint8* rgb_buf,
1063 int width) {
1064 int x;
1065 for (x = 0; x < width - 1; x += 2) {
1066 YuvPixel(src_y[0], src_u[0], src_v[0],
1067 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1068 YuvPixel(src_y[1], src_u[0], src_v[0],
1069 rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
1070 src_y += 2;
1071 src_u += 1;
1072 src_v += 1;
1073 rgb_buf += 6; // Advance 2 pixels.
1074 }
1075 if (width & 1) {
1076 YuvPixel(src_y[0], src_u[0], src_v[0],
1077 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1078 }
1079 }
1080
1081 void I422ToARGB4444Row_C(const uint8* src_y,
1082 const uint8* src_u,
1083 const uint8* src_v,
1084 uint8* dst_argb4444,
1085 int width) {
1086 uint8 b0;
1087 uint8 g0;
1088 uint8 r0;
1089 uint8 b1;
1090 uint8 g1;
1091 uint8 r1;
1092 int x;
1093 for (x = 0; x < width - 1; x += 2) {
1094 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1095 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1096 b0 = b0 >> 4;
1097 g0 = g0 >> 4;
1098 r0 = r0 >> 4;
1099 b1 = b1 >> 4;
1100 g1 = g1 >> 4;
1101 r1 = r1 >> 4;
1102 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1103 (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
1104 src_y += 2;
1105 src_u += 1;
1106 src_v += 1;
1107 dst_argb4444 += 4; // Advance 2 pixels.
1108 }
1109 if (width & 1) {
1110 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1111 b0 = b0 >> 4;
1112 g0 = g0 >> 4;
1113 r0 = r0 >> 4;
1114 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
1115 0xf000;
1116 }
1117 }
1118
1119 void I422ToARGB1555Row_C(const uint8* src_y,
1120 const uint8* src_u,
1121 const uint8* src_v,
1122 uint8* dst_argb1555,
1123 int width) {
1124 uint8 b0;
1125 uint8 g0;
1126 uint8 r0;
1127 uint8 b1;
1128 uint8 g1;
1129 uint8 r1;
1130 int x;
1131 for (x = 0; x < width - 1; x += 2) {
1132 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1133 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1134 b0 = b0 >> 3;
1135 g0 = g0 >> 3;
1136 r0 = r0 >> 3;
1137 b1 = b1 >> 3;
1138 g1 = g1 >> 3;
1139 r1 = r1 >> 3;
1140 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1141 (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
1142 src_y += 2;
1143 src_u += 1;
1144 src_v += 1;
1145 dst_argb1555 += 4; // Advance 2 pixels.
1146 }
1147 if (width & 1) {
1148 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1149 b0 = b0 >> 3;
1150 g0 = g0 >> 3;
1151 r0 = r0 >> 3;
1152 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
1153 0x8000;
1154 }
1155 }
1156
1157 void I422ToRGB565Row_C(const uint8* src_y,
1158 const uint8* src_u,
1159 const uint8* src_v,
1160 uint8* dst_rgb565,
1161 int width) {
1162 uint8 b0;
1163 uint8 g0;
1164 uint8 r0;
1165 uint8 b1;
1166 uint8 g1;
1167 uint8 r1;
1168 int x;
1169 for (x = 0; x < width - 1; x += 2) {
1170 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1171 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
1172 b0 = b0 >> 3;
1173 g0 = g0 >> 2;
1174 r0 = r0 >> 3;
1175 b1 = b1 >> 3;
1176 g1 = g1 >> 2;
1177 r1 = r1 >> 3;
1178 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1179 (b1 << 16) | (g1 << 21) | (r1 << 27);
1180 src_y += 2;
1181 src_u += 1;
1182 src_v += 1;
1183 dst_rgb565 += 4; // Advance 2 pixels.
1184 }
1185 if (width & 1) {
1186 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
1187 b0 = b0 >> 3;
1188 g0 = g0 >> 2;
1189 r0 = r0 >> 3;
1190 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1191 }
1192 }
1193
1194 void I411ToARGBRow_C(const uint8* src_y,
1195 const uint8* src_u,
1196 const uint8* src_v,
1197 uint8* rgb_buf,
1198 int width) {
1199 int x;
1200 for (x = 0; x < width - 3; x += 4) {
1201 YuvPixel(src_y[0], src_u[0], src_v[0],
1202 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1203 rgb_buf[3] = 255;
1204 YuvPixel(src_y[1], src_u[0], src_v[0],
1205 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1206 rgb_buf[7] = 255;
1207 YuvPixel(src_y[2], src_u[0], src_v[0],
1208 rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
1209 rgb_buf[11] = 255;
1210 YuvPixel(src_y[3], src_u[0], src_v[0],
1211 rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
1212 rgb_buf[15] = 255;
1213 src_y += 4;
1214 src_u += 1;
1215 src_v += 1;
1216 rgb_buf += 16; // Advance 4 pixels.
1217 }
1218 if (width & 2) {
1219 YuvPixel(src_y[0], src_u[0], src_v[0],
1220 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1221 rgb_buf[3] = 255;
1222 YuvPixel(src_y[1], src_u[0], src_v[0],
1223 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1224 rgb_buf[7] = 255;
1225 src_y += 2;
1226 rgb_buf += 8; // Advance 2 pixels.
1227 }
1228 if (width & 1) {
1229 YuvPixel(src_y[0], src_u[0], src_v[0],
1230 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1231 rgb_buf[3] = 255;
1232 }
1233 }
1234
1235 void NV12ToARGBRow_C(const uint8* src_y,
1236 const uint8* usrc_v,
1237 uint8* rgb_buf,
1238 int width) {
1239 int x;
1240 for (x = 0; x < width - 1; x += 2) {
1241 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1242 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1243 rgb_buf[3] = 255;
1244 YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
1245 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1246 rgb_buf[7] = 255;
1247 src_y += 2;
1248 usrc_v += 2;
1249 rgb_buf += 8; // Advance 2 pixels.
1250 }
1251 if (width & 1) {
1252 YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
1253 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1254 rgb_buf[3] = 255;
1255 }
1256 }
1257
1258 void NV21ToARGBRow_C(const uint8* src_y,
1259 const uint8* src_vu,
1260 uint8* rgb_buf,
1261 int width) {
1262 int x;
1263 for (x = 0; x < width - 1; x += 2) {
1264 YuvPixel(src_y[0], src_vu[1], src_vu[0],
1265 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1266 rgb_buf[3] = 255;
1267
1268 YuvPixel(src_y[1], src_vu[1], src_vu[0],
1269 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1270 rgb_buf[7] = 255;
1271
1272 src_y += 2;
1273 src_vu += 2;
1274 rgb_buf += 8; // Advance 2 pixels.
1275 }
1276 if (width & 1) {
1277 YuvPixel(src_y[0], src_vu[1], src_vu[0],
1278 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1279 rgb_buf[3] = 255;
1280 }
1281 }
1282
1283 void NV12ToRGB565Row_C(const uint8* src_y,
1284 const uint8* usrc_v,
1285 uint8* dst_rgb565,
1286 int width) {
1287 uint8 b0;
1288 uint8 g0;
1289 uint8 r0;
1290 uint8 b1;
1291 uint8 g1;
1292 uint8 r1;
1293 int x;
1294 for (x = 0; x < width - 1; x += 2) {
1295 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1296 YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
1297 b0 = b0 >> 3;
1298 g0 = g0 >> 2;
1299 r0 = r0 >> 3;
1300 b1 = b1 >> 3;
1301 g1 = g1 >> 2;
1302 r1 = r1 >> 3;
1303 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1304 (b1 << 16) | (g1 << 21) | (r1 << 27);
1305 src_y += 2;
1306 usrc_v += 2;
1307 dst_rgb565 += 4; // Advance 2 pixels.
1308 }
1309 if (width & 1) {
1310 YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
1311 b0 = b0 >> 3;
1312 g0 = g0 >> 2;
1313 r0 = r0 >> 3;
1314 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1315 }
1316 }
1317
1318 void NV21ToRGB565Row_C(const uint8* src_y,
1319 const uint8* vsrc_u,
1320 uint8* dst_rgb565,
1321 int width) {
1322 uint8 b0;
1323 uint8 g0;
1324 uint8 r0;
1325 uint8 b1;
1326 uint8 g1;
1327 uint8 r1;
1328 int x;
1329 for (x = 0; x < width - 1; x += 2) {
1330 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1331 YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
1332 b0 = b0 >> 3;
1333 g0 = g0 >> 2;
1334 r0 = r0 >> 3;
1335 b1 = b1 >> 3;
1336 g1 = g1 >> 2;
1337 r1 = r1 >> 3;
1338 *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
1339 (b1 << 16) | (g1 << 21) | (r1 << 27);
1340 src_y += 2;
1341 vsrc_u += 2;
1342 dst_rgb565 += 4; // Advance 2 pixels.
1343 }
1344 if (width & 1) {
1345 YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
1346 b0 = b0 >> 3;
1347 g0 = g0 >> 2;
1348 r0 = r0 >> 3;
1349 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1350 }
1351 }
1352
1353 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1354 uint8* rgb_buf,
1355 int width) {
1356 int x;
1357 for (x = 0; x < width - 1; x += 2) {
1358 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1359 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1360 rgb_buf[3] = 255;
1361 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
1362 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1363 rgb_buf[7] = 255;
1364 src_yuy2 += 4;
1365 rgb_buf += 8; // Advance 2 pixels.
1366 }
1367 if (width & 1) {
1368 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
1369 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1370 rgb_buf[3] = 255;
1371 }
1372 }
1373
1374 void UYVYToARGBRow_C(const uint8* src_uyvy,
1375 uint8* rgb_buf,
1376 int width) {
1377 int x;
1378 for (x = 0; x < width - 1; x += 2) {
1379 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1380 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1381 rgb_buf[3] = 255;
1382 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
1383 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1384 rgb_buf[7] = 255;
1385 src_uyvy += 4;
1386 rgb_buf += 8; // Advance 2 pixels.
1387 }
1388 if (width & 1) {
1389 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
1390 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1391 rgb_buf[3] = 255;
1392 }
1393 }
1394
1395 void I422ToBGRARow_C(const uint8* src_y,
1396 const uint8* src_u,
1397 const uint8* src_v,
1398 uint8* rgb_buf,
1399 int width) {
1400 int x;
1401 for (x = 0; x < width - 1; x += 2) {
1402 YuvPixel(src_y[0], src_u[0], src_v[0],
1403 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1404 rgb_buf[0] = 255;
1405 YuvPixel(src_y[1], src_u[0], src_v[0],
1406 rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
1407 rgb_buf[4] = 255;
1408 src_y += 2;
1409 src_u += 1;
1410 src_v += 1;
1411 rgb_buf += 8; // Advance 2 pixels.
1412 }
1413 if (width & 1) {
1414 YuvPixel(src_y[0], src_u[0], src_v[0],
1415 rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
1416 rgb_buf[0] = 255;
1417 }
1418 }
1419
1420 void I422ToABGRRow_C(const uint8* src_y,
1421 const uint8* src_u,
1422 const uint8* src_v,
1423 uint8* rgb_buf,
1424 int width) {
1425 int x;
1426 for (x = 0; x < width - 1; x += 2) {
1427 YuvPixel(src_y[0], src_u[0], src_v[0],
1428 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1429 rgb_buf[3] = 255;
1430 YuvPixel(src_y[1], src_u[0], src_v[0],
1431 rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
1432 rgb_buf[7] = 255;
1433 src_y += 2;
1434 src_u += 1;
1435 src_v += 1;
1436 rgb_buf += 8; // Advance 2 pixels.
1437 }
1438 if (width & 1) {
1439 YuvPixel(src_y[0], src_u[0], src_v[0],
1440 rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
1441 rgb_buf[3] = 255;
1442 }
1443 }
1444
1445 void I422ToRGBARow_C(const uint8* src_y,
1446 const uint8* src_u,
1447 const uint8* src_v,
1448 uint8* rgb_buf,
1449 int width) {
1450 int x;
1451 for (x = 0; x < width - 1; x += 2) {
1452 YuvPixel(src_y[0], src_u[0], src_v[0],
1453 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1454 rgb_buf[0] = 255;
1455 YuvPixel(src_y[1], src_u[0], src_v[0],
1456 rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
1457 rgb_buf[4] = 255;
1458 src_y += 2;
1459 src_u += 1;
1460 src_v += 1;
1461 rgb_buf += 8; // Advance 2 pixels.
1462 }
1463 if (width & 1) {
1464 YuvPixel(src_y[0], src_u[0], src_v[0],
1465 rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
1466 rgb_buf[0] = 255;
1467 }
1468 }
1469
1470 void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1471 int x;
1472 for (x = 0; x < width - 1; x += 2) {
1473 YuvPixel(src_y[0], 128, 128,
1474 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1475 rgb_buf[3] = 255;
1476 YuvPixel(src_y[1], 128, 128,
1477 rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1478 rgb_buf[7] = 255;
1479 src_y += 2;
1480 rgb_buf += 8; // Advance 2 pixels.
1481 }
1482 if (width & 1) {
1483 YuvPixel(src_y[0], 128, 128,
1484 rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1485 rgb_buf[3] = 255;
1486 }
1487 }
1488
1489 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1490 int x;
1491 src += width - 1;
1492 for (x = 0; x < width - 1; x += 2) {
1493 dst[x] = src[0];
1494 dst[x + 1] = src[-1];
1495 src -= 2;
1496 }
1497 if (width & 1) {
1498 dst[width - 1] = src[0];
1499 }
1500 }
1501
1502 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1503 int x;
1504 src_uv += (width - 1) << 1;
1505 for (x = 0; x < width - 1; x += 2) {
1506 dst_u[x] = src_uv[0];
1507 dst_u[x + 1] = src_uv[-2];
1508 dst_v[x] = src_uv[1];
1509 dst_v[x + 1] = src_uv[-2 + 1];
1510 src_uv -= 4;
1511 }
1512 if (width & 1) {
1513 dst_u[width - 1] = src_uv[0];
1514 dst_v[width - 1] = src_uv[1];
1515 }
1516 }
1517
1518 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1519 int x;
1520 const uint32* src32 = (const uint32*)(src);
1521 uint32* dst32 = (uint32*)(dst);
1522 src32 += width - 1;
1523 for (x = 0; x < width - 1; x += 2) {
1524 dst32[x] = src32[0];
1525 dst32[x + 1] = src32[-1];
1526 src32 -= 2;
1527 }
1528 if (width & 1) {
1529 dst32[width - 1] = src32[0];
1530 }
1531 }
1532
1533 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1534 int x;
1535 for (x = 0; x < width - 1; x += 2) {
1536 dst_u[x] = src_uv[0];
1537 dst_u[x + 1] = src_uv[2];
1538 dst_v[x] = src_uv[1];
1539 dst_v[x + 1] = src_uv[3];
1540 src_uv += 4;
1541 }
1542 if (width & 1) {
1543 dst_u[width - 1] = src_uv[0];
1544 dst_v[width - 1] = src_uv[1];
1545 }
1546 }
1547
1548 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
1549 int width) {
1550 int x;
1551 for (x = 0; x < width - 1; x += 2) {
1552 dst_uv[0] = src_u[x];
1553 dst_uv[1] = src_v[x];
1554 dst_uv[2] = src_u[x + 1];
1555 dst_uv[3] = src_v[x + 1];
1556 dst_uv += 4;
1557 }
1558 if (width & 1) {
1559 dst_uv[0] = src_u[width - 1];
1560 dst_uv[1] = src_v[width - 1];
1561 }
1562 }
1563
1564 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1565 memcpy(dst, src, count);
1566 }
1567
1568 void SetRow_C(uint8* dst, uint32 v8, int count) {
1569 #ifdef _MSC_VER
1570 // VC will generate rep stosb.
1571 int x;
1572 for (x = 0; x < count; ++x) {
1573 dst[x] = v8;
1574 }
1575 #else
1576 memset(dst, v8, count);
1577 #endif
1578 }
1579
1580 void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
1581 int dst_stride, int height) {
1582 int y;
1583 for (y = 0; y < height; ++y) {
1584 uint32* d = (uint32*)(dst);
1585 int x;
1586 for (x = 0; x < width; ++x) {
1587 d[x] = v32;
1588 }
1589 dst += dst_stride;
1590 }
1591 }
1592
1593 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
1594 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
1595 uint8* dst_u, uint8* dst_v, int width) {
1596 // Output a row of UV values, filtering 2 rows of YUY2.
1597 int x;
1598 for (x = 0; x < width; x += 2) {
1599 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1600 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1601 src_yuy2 += 4;
1602 dst_u += 1;
1603 dst_v += 1;
1604 }
1605 }
1606
1607 // Copy row of YUY2 UV's (422) into U and V (422).
1608 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1609 uint8* dst_u, uint8* dst_v, int width) {
1610 // Output a row of UV values.
1611 int x;
1612 for (x = 0; x < width; x += 2) {
1613 dst_u[0] = src_yuy2[1];
1614 dst_v[0] = src_yuy2[3];
1615 src_yuy2 += 4;
1616 dst_u += 1;
1617 dst_v += 1;
1618 }
1619 }
1620
1621 // Copy row of YUY2 Y's (422) into Y (420/422).
1622 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1623 // Output a row of Y values.
1624 int x;
1625 for (x = 0; x < width - 1; x += 2) {
1626 dst_y[x] = src_yuy2[0];
1627 dst_y[x + 1] = src_yuy2[2];
1628 src_yuy2 += 4;
1629 }
1630 if (width & 1) {
1631 dst_y[width - 1] = src_yuy2[0];
1632 }
1633 }
1634
1635 // Filter 2 rows of UYVY UV's (422) into U and V (420).
1636 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
1637 uint8* dst_u, uint8* dst_v, int width) {
1638 // Output a row of UV values.
1639 int x;
1640 for (x = 0; x < width; x += 2) {
1641 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1642 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1643 src_uyvy += 4;
1644 dst_u += 1;
1645 dst_v += 1;
1646 }
1647 }
1648
1649 // Copy row of UYVY UV's (422) into U and V (422).
1650 void UYVYToUV422Row_C(const uint8* src_uyvy,
1651 uint8* dst_u, uint8* dst_v, int width) {
1652 // Output a row of UV values.
1653 int x;
1654 for (x = 0; x < width; x += 2) {
1655 dst_u[0] = src_uyvy[0];
1656 dst_v[0] = src_uyvy[2];
1657 src_uyvy += 4;
1658 dst_u += 1;
1659 dst_v += 1;
1660 }
1661 }
1662
1663 // Copy row of UYVY Y's (422) into Y (420/422).
1664 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1665 // Output a row of Y values.
1666 int x;
1667 for (x = 0; x < width - 1; x += 2) {
1668 dst_y[x] = src_uyvy[1];
1669 dst_y[x + 1] = src_uyvy[3];
1670 src_uyvy += 4;
1671 }
1672 if (width & 1) {
1673 dst_y[width - 1] = src_uyvy[1];
1674 }
1675 }
1676
1677 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1678
1679 // Blend src_argb0 over src_argb1 and store to dst_argb.
1680 // dst_argb may be src_argb0 or src_argb1.
1681 // This code mimics the SSSE3 version for better testability.
1682 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
1683 uint8* dst_argb, int width) {
1684 int x;
1685 for (x = 0; x < width - 1; x += 2) {
1686 uint32 fb = src_argb0[0];
1687 uint32 fg = src_argb0[1];
1688 uint32 fr = src_argb0[2];
1689 uint32 a = src_argb0[3];
1690 uint32 bb = src_argb1[0];
1691 uint32 bg = src_argb1[1];
1692 uint32 br = src_argb1[2];
1693 dst_argb[0] = BLEND(fb, bb, a);
1694 dst_argb[1] = BLEND(fg, bg, a);
1695 dst_argb[2] = BLEND(fr, br, a);
1696 dst_argb[3] = 255u;
1697
1698 fb = src_argb0[4 + 0];
1699 fg = src_argb0[4 + 1];
1700 fr = src_argb0[4 + 2];
1701 a = src_argb0[4 + 3];
1702 bb = src_argb1[4 + 0];
1703 bg = src_argb1[4 + 1];
1704 br = src_argb1[4 + 2];
1705 dst_argb[4 + 0] = BLEND(fb, bb, a);
1706 dst_argb[4 + 1] = BLEND(fg, bg, a);
1707 dst_argb[4 + 2] = BLEND(fr, br, a);
1708 dst_argb[4 + 3] = 255u;
1709 src_argb0 += 8;
1710 src_argb1 += 8;
1711 dst_argb += 8;
1712 }
1713
1714 if (width & 1) {
1715 uint32 fb = src_argb0[0];
1716 uint32 fg = src_argb0[1];
1717 uint32 fr = src_argb0[2];
1718 uint32 a = src_argb0[3];
1719 uint32 bb = src_argb1[0];
1720 uint32 bg = src_argb1[1];
1721 uint32 br = src_argb1[2];
1722 dst_argb[0] = BLEND(fb, bb, a);
1723 dst_argb[1] = BLEND(fg, bg, a);
1724 dst_argb[2] = BLEND(fr, br, a);
1725 dst_argb[3] = 255u;
1726 }
1727 }
1728 #undef BLEND
1729 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1730
1731 // Multiply source RGB by alpha and store to destination.
1732 // This code mimics the SSSE3 version for better testability.
1733 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1734 int i;
1735 for (i = 0; i < width - 1; i += 2) {
1736 uint32 b = src_argb[0];
1737 uint32 g = src_argb[1];
1738 uint32 r = src_argb[2];
1739 uint32 a = src_argb[3];
1740 dst_argb[0] = ATTENUATE(b, a);
1741 dst_argb[1] = ATTENUATE(g, a);
1742 dst_argb[2] = ATTENUATE(r, a);
1743 dst_argb[3] = a;
1744 b = src_argb[4];
1745 g = src_argb[5];
1746 r = src_argb[6];
1747 a = src_argb[7];
1748 dst_argb[4] = ATTENUATE(b, a);
1749 dst_argb[5] = ATTENUATE(g, a);
1750 dst_argb[6] = ATTENUATE(r, a);
1751 dst_argb[7] = a;
1752 src_argb += 8;
1753 dst_argb += 8;
1754 }
1755
1756 if (width & 1) {
1757 const uint32 b = src_argb[0];
1758 const uint32 g = src_argb[1];
1759 const uint32 r = src_argb[2];
1760 const uint32 a = src_argb[3];
1761 dst_argb[0] = ATTENUATE(b, a);
1762 dst_argb[1] = ATTENUATE(g, a);
1763 dst_argb[2] = ATTENUATE(r, a);
1764 dst_argb[3] = a;
1765 }
1766 }
1767 #undef ATTENUATE
1768
1769 // Divide source RGB by alpha and store to destination.
1770 // b = (b * 255 + (a / 2)) / a;
1771 // g = (g * 255 + (a / 2)) / a;
1772 // r = (r * 255 + (a / 2)) / a;
1773 // Reciprocal method is off by 1 on some values. ie 125
1774 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
1775 #define T(a) 0x01000000 + (0x10000 / a)
1776 const uint32 fixed_invtbl8[256] = {
1777 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
1778 T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
1779 T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
1780 T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
1781 T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
1782 T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
1783 T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
1784 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
1785 T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
1786 T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
1787 T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
1788 T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
1789 T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
1790 T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
1791 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
1792 T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
1793 T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
1794 T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
1795 T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
1796 T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
1797 T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
1798 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
1799 T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
1800 T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
1801 T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
1802 T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
1803 T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
1804 T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
1805 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
1806 T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
1807 T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
1808 T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
1809 #undef T
1810
1811 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1812 int i;
1813 for (i = 0; i < width; ++i) {
1814 uint32 b = src_argb[0];
1815 uint32 g = src_argb[1];
1816 uint32 r = src_argb[2];
1817 const uint32 a = src_argb[3];
1818 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
1819 b = (b * ia) >> 8;
1820 g = (g * ia) >> 8;
1821 r = (r * ia) >> 8;
1822 // Clamping should not be necessary but is free in assembly.
1823 dst_argb[0] = clamp255(b);
1824 dst_argb[1] = clamp255(g);
1825 dst_argb[2] = clamp255(r);
1826 dst_argb[3] = a;
1827 src_argb += 4;
1828 dst_argb += 4;
1829 }
1830 }
1831
1832 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
1833 const int32* previous_cumsum, int width) {
1834 int32 row_sum[4] = {0, 0, 0, 0};
1835 int x;
1836 for (x = 0; x < width; ++x) {
1837 row_sum[0] += row[x * 4 + 0];
1838 row_sum[1] += row[x * 4 + 1];
1839 row_sum[2] += row[x * 4 + 2];
1840 row_sum[3] += row[x * 4 + 3];
1841 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
1842 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
1843 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
1844 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
1845 }
1846 }
1847
1848 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
1849 int w, int area, uint8* dst, int count) {
1850 float ooa = 1.0f / area;
1851 int i;
1852 for (i = 0; i < count; ++i) {
1853 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
1854 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
1855 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
1856 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
1857 dst += 4;
1858 tl += 4;
1859 bl += 4;
1860 }
1861 }
1862
1863 // Copy pixels from rotated source to destination row with a slope.
1864 LIBYUV_API
1865 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
1866 uint8* dst_argb, const float* uv_dudv, int width) {
1867 int i;
1868 // Render a row of pixels from source into a buffer.
1869 float uv[2];
1870 uv[0] = uv_dudv[0];
1871 uv[1] = uv_dudv[1];
1872 for (i = 0; i < width; ++i) {
1873 int x = (int)(uv[0]);
1874 int y = (int)(uv[1]);
1875 *(uint32*)(dst_argb) =
1876 *(const uint32*)(src_argb + y * src_argb_stride +
1877 x * 4);
1878 dst_argb += 4;
1879 uv[0] += uv_dudv[2];
1880 uv[1] += uv_dudv[3];
1881 }
1882 }
1883
1884 // Blend 2 rows into 1 for conversions such as I422ToI420.
1885 void HalfRow_C(const uint8* src_uv, int src_uv_stride,
1886 uint8* dst_uv, int pix) {
1887 int x;
1888 for (x = 0; x < pix; ++x) {
1889 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
1890 }
1891 }
1892
1893 // C version 2x2 -> 2x1.
1894 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
1895 ptrdiff_t src_stride,
1896 int width, int source_y_fraction) {
1897 int y1_fraction = source_y_fraction;
1898 int y0_fraction = 256 - y1_fraction;
1899 const uint8* src_ptr1 = src_ptr + src_stride;
1900 int x;
1901 if (source_y_fraction == 0) {
1902 memcpy(dst_ptr, src_ptr, width);
1903 return;
1904 }
1905 if (source_y_fraction == 128) {
1906 HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
1907 return;
1908 }
1909 for (x = 0; x < width - 1; x += 2) {
1910 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1911 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
1912 src_ptr += 2;
1913 src_ptr1 += 2;
1914 dst_ptr += 2;
1915 }
1916 if (width & 1) {
1917 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
1918 }
1919 }
1920
1921 // Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG
1922 void ARGBToBayerRow_C(const uint8* src_argb,
1923 uint8* dst_bayer, uint32 selector, int pix) {
1924 int index0 = selector & 0xff;
1925 int index1 = (selector >> 8) & 0xff;
1926 // Copy a row of Bayer.
1927 int x;
1928 for (x = 0; x < pix - 1; x += 2) {
1929 dst_bayer[0] = src_argb[index0];
1930 dst_bayer[1] = src_argb[index1];
1931 src_argb += 8;
1932 dst_bayer += 2;
1933 }
1934 if (pix & 1) {
1935 dst_bayer[0] = src_argb[index0];
1936 }
1937 }
1938
1939 // Select G channel from ARGB. e.g. GGGGGGGG
1940 void ARGBToBayerGGRow_C(const uint8* src_argb,
1941 uint8* dst_bayer, uint32 selector, int pix) {
1942 // Copy a row of G.
1943 int x;
1944 for (x = 0; x < pix - 1; x += 2) {
1945 dst_bayer[0] = src_argb[1];
1946 dst_bayer[1] = src_argb[5];
1947 src_argb += 8;
1948 dst_bayer += 2;
1949 }
1950 if (pix & 1) {
1951 dst_bayer[0] = src_argb[1];
1952 }
1953 }
1954
1955 // Use first 4 shuffler values to reorder ARGB channels.
1956 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
1957 const uint8* shuffler, int pix) {
1958 int index0 = shuffler[0];
1959 int index1 = shuffler[1];
1960 int index2 = shuffler[2];
1961 int index3 = shuffler[3];
1962 // Shuffle a row of ARGB.
1963 int x;
1964 for (x = 0; x < pix; ++x) {
1965 // To support in-place conversion.
1966 uint8 b = src_argb[index0];
1967 uint8 g = src_argb[index1];
1968 uint8 r = src_argb[index2];
1969 uint8 a = src_argb[index3];
1970 dst_argb[0] = b;
1971 dst_argb[1] = g;
1972 dst_argb[2] = r;
1973 dst_argb[3] = a;
1974 src_argb += 4;
1975 dst_argb += 4;
1976 }
1977 }
1978
1979 void I422ToYUY2Row_C(const uint8* src_y,
1980 const uint8* src_u,
1981 const uint8* src_v,
1982 uint8* dst_frame, int width) {
1983 int x;
1984 for (x = 0; x < width - 1; x += 2) {
1985 dst_frame[0] = src_y[0];
1986 dst_frame[1] = src_u[0];
1987 dst_frame[2] = src_y[1];
1988 dst_frame[3] = src_v[0];
1989 dst_frame += 4;
1990 src_y += 2;
1991 src_u += 1;
1992 src_v += 1;
1993 }
1994 if (width & 1) {
1995 dst_frame[0] = src_y[0];
1996 dst_frame[1] = src_u[0];
1997 dst_frame[2] = src_y[0]; // duplicate last y
1998 dst_frame[3] = src_v[0];
1999 }
2000 }
2001
2002 void I422ToUYVYRow_C(const uint8* src_y,
2003 const uint8* src_u,
2004 const uint8* src_v,
2005 uint8* dst_frame, int width) {
2006 int x;
2007 for (x = 0; x < width - 1; x += 2) {
2008 dst_frame[0] = src_u[0];
2009 dst_frame[1] = src_y[0];
2010 dst_frame[2] = src_v[0];
2011 dst_frame[3] = src_y[1];
2012 dst_frame += 4;
2013 src_y += 2;
2014 src_u += 1;
2015 src_v += 1;
2016 }
2017 if (width & 1) {
2018 dst_frame[0] = src_u[0];
2019 dst_frame[1] = src_y[0];
2020 dst_frame[2] = src_v[0];
2021 dst_frame[3] = src_y[0]; // duplicate last y
2022 }
2023 }
2024
2025 #if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
2026 // row_win.cc has asm version, but GCC uses 2 step wrapper.
2027 #if defined(__x86_64__) || defined(__i386__)
2028 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2029 const uint8* src_u,
2030 const uint8* src_v,
2031 uint8* rgb_buf,
2032 int width) {
2033 // Allocate a row of ARGB.
2034 align_buffer_64(row, width * 4);
2035 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2036 ARGBToRGB565Row_SSE2(row, rgb_buf, width);
2037 free_aligned_buffer_64(row);
2038 }
2039 #endif // defined(__x86_64__) || defined(__i386__)
2040
2041 #if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2042 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2043 const uint8* src_u,
2044 const uint8* src_v,
2045 uint8* rgb_buf,
2046 int width) {
2047 // Allocate a row of ARGB.
2048 align_buffer_64(row, width * 4);
2049 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2050 ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
2051 free_aligned_buffer_64(row);
2052 }
2053
2054 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2055 const uint8* src_u,
2056 const uint8* src_v,
2057 uint8* rgb_buf,
2058 int width) {
2059 // Allocate a row of ARGB.
2060 align_buffer_64(row, width * 4);
2061 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
2062 ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
2063 free_aligned_buffer_64(row);
2064 }
2065
2066 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2067 const uint8* src_uv,
2068 uint8* dst_rgb565,
2069 int width) {
2070 // Allocate a row of ARGB.
2071 align_buffer_64(row, width * 4);
2072 NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
2073 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2074 free_aligned_buffer_64(row);
2075 }
2076
2077 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
2078 const uint8* src_vu,
2079 uint8* dst_rgb565,
2080 int width) {
2081 // Allocate a row of ARGB.
2082 align_buffer_64(row, width * 4);
2083 NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
2084 ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
2085 free_aligned_buffer_64(row);
2086 }
2087
2088 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2089 uint8* dst_argb,
2090 int width) {
2091 // Allocate a rows of yuv.
2092 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2093 uint8* row_u = row_y + ((width + 63) & ~63);
2094 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2095 YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
2096 YUY2ToYRow_SSE2(src_yuy2, row_y, width);
2097 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2098 free_aligned_buffer_64(row_y);
2099 }
2100
2101 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
2102 uint8* dst_argb,
2103 int width) {
2104 // Allocate a rows of yuv.
2105 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2106 uint8* row_u = row_y + ((width + 63) & ~63);
2107 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2108 YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
2109 YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
2110 I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2111 free_aligned_buffer_64(row_y);
2112 }
2113
2114 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2115 uint8* dst_argb,
2116 int width) {
2117 // Allocate a rows of yuv.
2118 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2119 uint8* row_u = row_y + ((width + 63) & ~63);
2120 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2121 UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
2122 UYVYToYRow_SSE2(src_uyvy, row_y, width);
2123 I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
2124 free_aligned_buffer_64(row_y);
2125 }
2126
2127 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
2128 uint8* dst_argb,
2129 int width) {
2130 // Allocate a rows of yuv.
2131 align_buffer_64(row_y, ((width + 63) & ~63) * 2);
2132 uint8* row_u = row_y + ((width + 63) & ~63);
2133 uint8* row_v = row_u + ((width + 63) & ~63) / 2;
2134 UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
2135 UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
2136 I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
2137 free_aligned_buffer_64(row_y);
2138 }
2139
2140 #endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
2141 #endif // !defined(LIBYUV_DISABLE_X86)
2142
2143 void ARGBPolynomialRow_C(const uint8* src_argb,
2144 uint8* dst_argb, const float* poly,
2145 int width) {
2146 int i;
2147 for (i = 0; i < width; ++i) {
2148 float b = (float)(src_argb[0]);
2149 float g = (float)(src_argb[1]);
2150 float r = (float)(src_argb[2]);
2151 float a = (float)(src_argb[3]);
2152 float b2 = b * b;
2153 float g2 = g * g;
2154 float r2 = r * r;
2155 float a2 = a * a;
2156 float db = poly[0] + poly[4] * b;
2157 float dg = poly[1] + poly[5] * g;
2158 float dr = poly[2] + poly[6] * r;
2159 float da = poly[3] + poly[7] * a;
2160 float b3 = b2 * b;
2161 float g3 = g2 * g;
2162 float r3 = r2 * r;
2163 float a3 = a2 * a;
2164 db += poly[8] * b2;
2165 dg += poly[9] * g2;
2166 dr += poly[10] * r2;
2167 da += poly[11] * a2;
2168 db += poly[12] * b3;
2169 dg += poly[13] * g3;
2170 dr += poly[14] * r3;
2171 da += poly[15] * a3;
2172
2173 dst_argb[0] = Clamp((int32)(db));
2174 dst_argb[1] = Clamp((int32)(dg));
2175 dst_argb[2] = Clamp((int32)(dr));
2176 dst_argb[3] = Clamp((int32)(da));
2177 src_argb += 4;
2178 dst_argb += 4;
2179 }
2180 }
2181
2182 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
2183 const uint8* luma, uint32 lumacoeff) {
2184 uint32 bc = lumacoeff & 0xff;
2185 uint32 gc = (lumacoeff >> 8) & 0xff;
2186 uint32 rc = (lumacoeff >> 16) & 0xff;
2187
2188 int i;
2189 for (i = 0; i < width - 1; i += 2) {
2190 // Luminance in rows, color values in columns.
2191 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2192 src_argb[2] * rc) & 0x7F00u) + luma;
2193 const uint8* luma1;
2194 dst_argb[0] = luma0[src_argb[0]];
2195 dst_argb[1] = luma0[src_argb[1]];
2196 dst_argb[2] = luma0[src_argb[2]];
2197 dst_argb[3] = src_argb[3];
2198 luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
2199 src_argb[6] * rc) & 0x7F00u) + luma;
2200 dst_argb[4] = luma1[src_argb[4]];
2201 dst_argb[5] = luma1[src_argb[5]];
2202 dst_argb[6] = luma1[src_argb[6]];
2203 dst_argb[7] = src_argb[7];
2204 src_argb += 8;
2205 dst_argb += 8;
2206 }
2207 if (width & 1) {
2208 // Luminance in rows, color values in columns.
2209 const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
2210 src_argb[2] * rc) & 0x7F00u) + luma;
2211 dst_argb[0] = luma0[src_argb[0]];
2212 dst_argb[1] = luma0[src_argb[1]];
2213 dst_argb[2] = luma0[src_argb[2]];
2214 dst_argb[3] = src_argb[3];
2215 }
2216 }
2217
2218 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2219 int i;
2220 for (i = 0; i < width - 1; i += 2) {
2221 dst[3] = src[3];
2222 dst[7] = src[7];
2223 dst += 8;
2224 src += 8;
2225 }
2226 if (width & 1) {
2227 dst[3] = src[3];
2228 }
2229 }
2230
2231 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2232 int i;
2233 for (i = 0; i < width - 1; i += 2) {
2234 dst[3] = src[0];
2235 dst[7] = src[1];
2236 dst += 8;
2237 src += 2;
2238 }
2239 if (width & 1) {
2240 dst[3] = src[0];
2241 }
2242 }
2243
2244 #ifdef __cplusplus
2245 } // extern "C"
2246 } // namespace libyuv
2247 #endif

mercurial