Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include "libyuv/planar_functions.h"
13 #include <string.h> // for memset()
15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h"
18 #endif
19 #include "libyuv/row.h"
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
26 // Copy a plane of data
27 LIBYUV_API
28 void CopyPlane(const uint8* src_y, int src_stride_y,
29 uint8* dst_y, int dst_stride_y,
30 int width, int height) {
31 int y;
32 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
33 // Coalesce rows.
34 if (src_stride_y == width &&
35 dst_stride_y == width) {
36 width *= height;
37 height = 1;
38 src_stride_y = dst_stride_y = 0;
39 }
40 #if defined(HAS_COPYROW_X86)
41 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
42 CopyRow = CopyRow_X86;
43 }
44 #endif
45 #if defined(HAS_COPYROW_SSE2)
46 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
47 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
48 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
49 CopyRow = CopyRow_SSE2;
50 }
51 #endif
52 #if defined(HAS_COPYROW_ERMS)
53 if (TestCpuFlag(kCpuHasERMS)) {
54 CopyRow = CopyRow_ERMS;
55 }
56 #endif
57 #if defined(HAS_COPYROW_NEON)
58 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
59 CopyRow = CopyRow_NEON;
60 }
61 #endif
62 #if defined(HAS_COPYROW_MIPS)
63 if (TestCpuFlag(kCpuHasMIPS)) {
64 CopyRow = CopyRow_MIPS;
65 }
66 #endif
68 // Copy plane
69 for (y = 0; y < height; ++y) {
70 CopyRow(src_y, dst_y, width);
71 src_y += src_stride_y;
72 dst_y += dst_stride_y;
73 }
74 }
76 // Copy I422.
77 LIBYUV_API
78 int I422Copy(const uint8* src_y, int src_stride_y,
79 const uint8* src_u, int src_stride_u,
80 const uint8* src_v, int src_stride_v,
81 uint8* dst_y, int dst_stride_y,
82 uint8* dst_u, int dst_stride_u,
83 uint8* dst_v, int dst_stride_v,
84 int width, int height) {
85 int halfwidth = (width + 1) >> 1;
86 if (!src_y || !src_u || !src_v ||
87 !dst_y || !dst_u || !dst_v ||
88 width <= 0 || height == 0) {
89 return -1;
90 }
91 // Negative height means invert the image.
92 if (height < 0) {
93 height = -height;
94 src_y = src_y + (height - 1) * src_stride_y;
95 src_u = src_u + (height - 1) * src_stride_u;
96 src_v = src_v + (height - 1) * src_stride_v;
97 src_stride_y = -src_stride_y;
98 src_stride_u = -src_stride_u;
99 src_stride_v = -src_stride_v;
100 }
101 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
102 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
103 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
104 return 0;
105 }
107 // Copy I444.
108 LIBYUV_API
109 int I444Copy(const uint8* src_y, int src_stride_y,
110 const uint8* src_u, int src_stride_u,
111 const uint8* src_v, int src_stride_v,
112 uint8* dst_y, int dst_stride_y,
113 uint8* dst_u, int dst_stride_u,
114 uint8* dst_v, int dst_stride_v,
115 int width, int height) {
116 if (!src_y || !src_u || !src_v ||
117 !dst_y || !dst_u || !dst_v ||
118 width <= 0 || height == 0) {
119 return -1;
120 }
121 // Negative height means invert the image.
122 if (height < 0) {
123 height = -height;
124 src_y = src_y + (height - 1) * src_stride_y;
125 src_u = src_u + (height - 1) * src_stride_u;
126 src_v = src_v + (height - 1) * src_stride_v;
127 src_stride_y = -src_stride_y;
128 src_stride_u = -src_stride_u;
129 src_stride_v = -src_stride_v;
130 }
132 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
133 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
134 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
135 return 0;
136 }
138 // Copy I400.
139 LIBYUV_API
140 int I400ToI400(const uint8* src_y, int src_stride_y,
141 uint8* dst_y, int dst_stride_y,
142 int width, int height) {
143 if (!src_y || !dst_y || width <= 0 || height == 0) {
144 return -1;
145 }
146 // Negative height means invert the image.
147 if (height < 0) {
148 height = -height;
149 src_y = src_y + (height - 1) * src_stride_y;
150 src_stride_y = -src_stride_y;
151 }
152 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
153 return 0;
154 }
156 // Convert I420 to I400.
157 LIBYUV_API
158 int I420ToI400(const uint8* src_y, int src_stride_y,
159 const uint8* src_u, int src_stride_u,
160 const uint8* src_v, int src_stride_v,
161 uint8* dst_y, int dst_stride_y,
162 int width, int height) {
163 if (!src_y || !dst_y || width <= 0 || height == 0) {
164 return -1;
165 }
166 // Negative height means invert the image.
167 if (height < 0) {
168 height = -height;
169 src_y = src_y + (height - 1) * src_stride_y;
170 src_stride_y = -src_stride_y;
171 }
172 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
173 return 0;
174 }
176 // Mirror a plane of data.
177 void MirrorPlane(const uint8* src_y, int src_stride_y,
178 uint8* dst_y, int dst_stride_y,
179 int width, int height) {
180 int y;
181 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
182 // Negative height means invert the image.
183 if (height < 0) {
184 height = -height;
185 src_y = src_y + (height - 1) * src_stride_y;
186 src_stride_y = -src_stride_y;
187 }
188 #if defined(HAS_MIRRORROW_NEON)
189 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
190 MirrorRow = MirrorRow_NEON;
191 }
192 #endif
193 #if defined(HAS_MIRRORROW_SSE2)
194 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
195 MirrorRow = MirrorRow_SSE2;
196 }
197 #endif
198 #if defined(HAS_MIRRORROW_SSSE3)
199 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
200 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
201 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
202 MirrorRow = MirrorRow_SSSE3;
203 }
204 #endif
205 #if defined(HAS_MIRRORROW_AVX2)
206 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
207 MirrorRow = MirrorRow_AVX2;
208 }
209 #endif
211 // Mirror plane
212 for (y = 0; y < height; ++y) {
213 MirrorRow(src_y, dst_y, width);
214 src_y += src_stride_y;
215 dst_y += dst_stride_y;
216 }
217 }
219 // Convert YUY2 to I422.
220 LIBYUV_API
221 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
222 uint8* dst_y, int dst_stride_y,
223 uint8* dst_u, int dst_stride_u,
224 uint8* dst_v, int dst_stride_v,
225 int width, int height) {
226 int y;
227 void (*YUY2ToUV422Row)(const uint8* src_yuy2,
228 uint8* dst_u, uint8* dst_v, int pix) =
229 YUY2ToUV422Row_C;
230 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
231 YUY2ToYRow_C;
232 // Negative height means invert the image.
233 if (height < 0) {
234 height = -height;
235 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
236 src_stride_yuy2 = -src_stride_yuy2;
237 }
238 // Coalesce rows.
239 if (src_stride_yuy2 == width * 2 &&
240 dst_stride_y == width &&
241 dst_stride_u * 2 == width &&
242 dst_stride_v * 2 == width) {
243 width *= height;
244 height = 1;
245 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
246 }
247 #if defined(HAS_YUY2TOYROW_SSE2)
248 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
249 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
250 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
251 if (IS_ALIGNED(width, 16)) {
252 YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
253 YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
254 if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
255 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
256 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
257 YUY2ToYRow = YUY2ToYRow_SSE2;
258 }
259 }
260 }
261 }
262 #endif
263 #if defined(HAS_YUY2TOYROW_AVX2)
264 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
265 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
266 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
267 if (IS_ALIGNED(width, 32)) {
268 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
269 YUY2ToYRow = YUY2ToYRow_AVX2;
270 }
271 }
272 #endif
273 #if defined(HAS_YUY2TOYROW_NEON)
274 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
275 YUY2ToYRow = YUY2ToYRow_Any_NEON;
276 if (width >= 16) {
277 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
278 }
279 if (IS_ALIGNED(width, 16)) {
280 YUY2ToYRow = YUY2ToYRow_NEON;
281 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
282 }
283 }
284 #endif
286 for (y = 0; y < height; ++y) {
287 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
288 YUY2ToYRow(src_yuy2, dst_y, width);
289 src_yuy2 += src_stride_yuy2;
290 dst_y += dst_stride_y;
291 dst_u += dst_stride_u;
292 dst_v += dst_stride_v;
293 }
294 return 0;
295 }
297 // Convert UYVY to I422.
298 LIBYUV_API
299 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
300 uint8* dst_y, int dst_stride_y,
301 uint8* dst_u, int dst_stride_u,
302 uint8* dst_v, int dst_stride_v,
303 int width, int height) {
304 int y;
305 void (*UYVYToUV422Row)(const uint8* src_uyvy,
306 uint8* dst_u, uint8* dst_v, int pix) =
307 UYVYToUV422Row_C;
308 void (*UYVYToYRow)(const uint8* src_uyvy,
309 uint8* dst_y, int pix) = UYVYToYRow_C;
310 // Negative height means invert the image.
311 if (height < 0) {
312 height = -height;
313 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
314 src_stride_uyvy = -src_stride_uyvy;
315 }
316 // Coalesce rows.
317 if (src_stride_uyvy == width * 2 &&
318 dst_stride_y == width &&
319 dst_stride_u * 2 == width &&
320 dst_stride_v * 2 == width) {
321 width *= height;
322 height = 1;
323 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
324 }
325 #if defined(HAS_UYVYTOYROW_SSE2)
326 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
327 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
328 UYVYToYRow = UYVYToYRow_Any_SSE2;
329 if (IS_ALIGNED(width, 16)) {
330 UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
331 UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
332 if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
333 UYVYToUV422Row = UYVYToUV422Row_SSE2;
334 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
335 UYVYToYRow = UYVYToYRow_SSE2;
336 }
337 }
338 }
339 }
340 #endif
341 #if defined(HAS_UYVYTOYROW_AVX2)
342 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
343 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
344 UYVYToYRow = UYVYToYRow_Any_AVX2;
345 if (IS_ALIGNED(width, 32)) {
346 UYVYToUV422Row = UYVYToUV422Row_AVX2;
347 UYVYToYRow = UYVYToYRow_AVX2;
348 }
349 }
350 #endif
351 #if defined(HAS_UYVYTOYROW_NEON)
352 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
353 UYVYToYRow = UYVYToYRow_Any_NEON;
354 if (width >= 16) {
355 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
356 }
357 if (IS_ALIGNED(width, 16)) {
358 UYVYToYRow = UYVYToYRow_NEON;
359 UYVYToUV422Row = UYVYToUV422Row_NEON;
360 }
361 }
362 #endif
364 for (y = 0; y < height; ++y) {
365 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
366 UYVYToYRow(src_uyvy, dst_y, width);
367 src_uyvy += src_stride_uyvy;
368 dst_y += dst_stride_y;
369 dst_u += dst_stride_u;
370 dst_v += dst_stride_v;
371 }
372 return 0;
373 }
375 // Mirror I400 with optional flipping
376 LIBYUV_API
377 int I400Mirror(const uint8* src_y, int src_stride_y,
378 uint8* dst_y, int dst_stride_y,
379 int width, int height) {
380 if (!src_y || !dst_y ||
381 width <= 0 || height == 0) {
382 return -1;
383 }
384 // Negative height means invert the image.
385 if (height < 0) {
386 height = -height;
387 src_y = src_y + (height - 1) * src_stride_y;
388 src_stride_y = -src_stride_y;
389 }
391 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
392 return 0;
393 }
395 // Mirror I420 with optional flipping
396 LIBYUV_API
397 int I420Mirror(const uint8* src_y, int src_stride_y,
398 const uint8* src_u, int src_stride_u,
399 const uint8* src_v, int src_stride_v,
400 uint8* dst_y, int dst_stride_y,
401 uint8* dst_u, int dst_stride_u,
402 uint8* dst_v, int dst_stride_v,
403 int width, int height) {
404 int halfwidth = (width + 1) >> 1;
405 int halfheight = (height + 1) >> 1;
406 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
407 width <= 0 || height == 0) {
408 return -1;
409 }
410 // Negative height means invert the image.
411 if (height < 0) {
412 height = -height;
413 halfheight = (height + 1) >> 1;
414 src_y = src_y + (height - 1) * src_stride_y;
415 src_u = src_u + (halfheight - 1) * src_stride_u;
416 src_v = src_v + (halfheight - 1) * src_stride_v;
417 src_stride_y = -src_stride_y;
418 src_stride_u = -src_stride_u;
419 src_stride_v = -src_stride_v;
420 }
422 if (dst_y) {
423 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
424 }
425 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
426 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
427 return 0;
428 }
430 // ARGB mirror.
431 LIBYUV_API
432 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
433 uint8* dst_argb, int dst_stride_argb,
434 int width, int height) {
435 int y;
436 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
437 ARGBMirrorRow_C;
438 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
439 return -1;
440 }
441 // Negative height means invert the image.
442 if (height < 0) {
443 height = -height;
444 src_argb = src_argb + (height - 1) * src_stride_argb;
445 src_stride_argb = -src_stride_argb;
446 }
448 #if defined(HAS_ARGBMIRRORROW_SSSE3)
449 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
450 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
451 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
452 ARGBMirrorRow = ARGBMirrorRow_SSSE3;
453 }
454 #endif
455 #if defined(HAS_ARGBMIRRORROW_AVX2)
456 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
457 ARGBMirrorRow = ARGBMirrorRow_AVX2;
458 }
459 #endif
460 #if defined(HAS_ARGBMIRRORROW_NEON)
461 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
462 ARGBMirrorRow = ARGBMirrorRow_NEON;
463 }
464 #endif
466 // Mirror plane
467 for (y = 0; y < height; ++y) {
468 ARGBMirrorRow(src_argb, dst_argb, width);
469 src_argb += src_stride_argb;
470 dst_argb += dst_stride_argb;
471 }
472 return 0;
473 }
475 // Get a blender that optimized for the CPU, alignment and pixel count.
476 // As there are 6 blenders to choose from, the caller should try to use
477 // the same blend function for all pixels if possible.
478 LIBYUV_API
479 ARGBBlendRow GetARGBBlend() {
480 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
481 uint8* dst_argb, int width) = ARGBBlendRow_C;
482 #if defined(HAS_ARGBBLENDROW_SSSE3)
483 if (TestCpuFlag(kCpuHasSSSE3)) {
484 ARGBBlendRow = ARGBBlendRow_SSSE3;
485 return ARGBBlendRow;
486 }
487 #endif
488 #if defined(HAS_ARGBBLENDROW_SSE2)
489 if (TestCpuFlag(kCpuHasSSE2)) {
490 ARGBBlendRow = ARGBBlendRow_SSE2;
491 }
492 #endif
493 #if defined(HAS_ARGBBLENDROW_NEON)
494 if (TestCpuFlag(kCpuHasNEON)) {
495 ARGBBlendRow = ARGBBlendRow_NEON;
496 }
497 #endif
498 return ARGBBlendRow;
499 }
501 // Alpha Blend 2 ARGB images and store to destination.
502 LIBYUV_API
503 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
504 const uint8* src_argb1, int src_stride_argb1,
505 uint8* dst_argb, int dst_stride_argb,
506 int width, int height) {
507 int y;
508 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
509 uint8* dst_argb, int width) = GetARGBBlend();
510 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
511 return -1;
512 }
513 // Negative height means invert the image.
514 if (height < 0) {
515 height = -height;
516 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
517 dst_stride_argb = -dst_stride_argb;
518 }
519 // Coalesce rows.
520 if (src_stride_argb0 == width * 4 &&
521 src_stride_argb1 == width * 4 &&
522 dst_stride_argb == width * 4) {
523 width *= height;
524 height = 1;
525 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
526 }
528 for (y = 0; y < height; ++y) {
529 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
530 src_argb0 += src_stride_argb0;
531 src_argb1 += src_stride_argb1;
532 dst_argb += dst_stride_argb;
533 }
534 return 0;
535 }
537 // Multiply 2 ARGB images and store to destination.
538 LIBYUV_API
539 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
540 const uint8* src_argb1, int src_stride_argb1,
541 uint8* dst_argb, int dst_stride_argb,
542 int width, int height) {
543 int y;
544 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
545 int width) = ARGBMultiplyRow_C;
546 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
547 return -1;
548 }
549 // Negative height means invert the image.
550 if (height < 0) {
551 height = -height;
552 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
553 dst_stride_argb = -dst_stride_argb;
554 }
555 // Coalesce rows.
556 if (src_stride_argb0 == width * 4 &&
557 src_stride_argb1 == width * 4 &&
558 dst_stride_argb == width * 4) {
559 width *= height;
560 height = 1;
561 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
562 }
563 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
564 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
565 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
566 if (IS_ALIGNED(width, 4)) {
567 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
568 }
569 }
570 #endif
571 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
572 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
573 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
574 if (IS_ALIGNED(width, 8)) {
575 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
576 }
577 }
578 #endif
579 #if defined(HAS_ARGBMULTIPLYROW_NEON)
580 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
581 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
582 if (IS_ALIGNED(width, 8)) {
583 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
584 }
585 }
586 #endif
588 // Multiply plane
589 for (y = 0; y < height; ++y) {
590 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
591 src_argb0 += src_stride_argb0;
592 src_argb1 += src_stride_argb1;
593 dst_argb += dst_stride_argb;
594 }
595 return 0;
596 }
598 // Add 2 ARGB images and store to destination.
599 LIBYUV_API
600 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
601 const uint8* src_argb1, int src_stride_argb1,
602 uint8* dst_argb, int dst_stride_argb,
603 int width, int height) {
604 int y;
605 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
606 int width) = ARGBAddRow_C;
607 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
608 return -1;
609 }
610 // Negative height means invert the image.
611 if (height < 0) {
612 height = -height;
613 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
614 dst_stride_argb = -dst_stride_argb;
615 }
616 // Coalesce rows.
617 if (src_stride_argb0 == width * 4 &&
618 src_stride_argb1 == width * 4 &&
619 dst_stride_argb == width * 4) {
620 width *= height;
621 height = 1;
622 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
623 }
624 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
625 if (TestCpuFlag(kCpuHasSSE2)) {
626 ARGBAddRow = ARGBAddRow_SSE2;
627 }
628 #endif
629 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
630 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
631 ARGBAddRow = ARGBAddRow_Any_SSE2;
632 if (IS_ALIGNED(width, 4)) {
633 ARGBAddRow = ARGBAddRow_SSE2;
634 }
635 }
636 #endif
637 #if defined(HAS_ARGBADDROW_AVX2)
638 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
639 ARGBAddRow = ARGBAddRow_Any_AVX2;
640 if (IS_ALIGNED(width, 8)) {
641 ARGBAddRow = ARGBAddRow_AVX2;
642 }
643 }
644 #endif
645 #if defined(HAS_ARGBADDROW_NEON)
646 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
647 ARGBAddRow = ARGBAddRow_Any_NEON;
648 if (IS_ALIGNED(width, 8)) {
649 ARGBAddRow = ARGBAddRow_NEON;
650 }
651 }
652 #endif
654 // Add plane
655 for (y = 0; y < height; ++y) {
656 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
657 src_argb0 += src_stride_argb0;
658 src_argb1 += src_stride_argb1;
659 dst_argb += dst_stride_argb;
660 }
661 return 0;
662 }
664 // Subtract 2 ARGB images and store to destination.
665 LIBYUV_API
666 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
667 const uint8* src_argb1, int src_stride_argb1,
668 uint8* dst_argb, int dst_stride_argb,
669 int width, int height) {
670 int y;
671 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
672 int width) = ARGBSubtractRow_C;
673 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
674 return -1;
675 }
676 // Negative height means invert the image.
677 if (height < 0) {
678 height = -height;
679 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
680 dst_stride_argb = -dst_stride_argb;
681 }
682 // Coalesce rows.
683 if (src_stride_argb0 == width * 4 &&
684 src_stride_argb1 == width * 4 &&
685 dst_stride_argb == width * 4) {
686 width *= height;
687 height = 1;
688 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
689 }
690 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
691 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
692 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
693 if (IS_ALIGNED(width, 4)) {
694 ARGBSubtractRow = ARGBSubtractRow_SSE2;
695 }
696 }
697 #endif
698 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
699 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
700 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
701 if (IS_ALIGNED(width, 8)) {
702 ARGBSubtractRow = ARGBSubtractRow_AVX2;
703 }
704 }
705 #endif
706 #if defined(HAS_ARGBSUBTRACTROW_NEON)
707 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
708 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
709 if (IS_ALIGNED(width, 8)) {
710 ARGBSubtractRow = ARGBSubtractRow_NEON;
711 }
712 }
713 #endif
715 // Subtract plane
716 for (y = 0; y < height; ++y) {
717 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
718 src_argb0 += src_stride_argb0;
719 src_argb1 += src_stride_argb1;
720 dst_argb += dst_stride_argb;
721 }
722 return 0;
723 }
725 // Convert I422 to BGRA.
726 LIBYUV_API
727 int I422ToBGRA(const uint8* src_y, int src_stride_y,
728 const uint8* src_u, int src_stride_u,
729 const uint8* src_v, int src_stride_v,
730 uint8* dst_bgra, int dst_stride_bgra,
731 int width, int height) {
732 int y;
733 void (*I422ToBGRARow)(const uint8* y_buf,
734 const uint8* u_buf,
735 const uint8* v_buf,
736 uint8* rgb_buf,
737 int width) = I422ToBGRARow_C;
738 if (!src_y || !src_u || !src_v ||
739 !dst_bgra ||
740 width <= 0 || height == 0) {
741 return -1;
742 }
743 // Negative height means invert the image.
744 if (height < 0) {
745 height = -height;
746 dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
747 dst_stride_bgra = -dst_stride_bgra;
748 }
749 // Coalesce rows.
750 if (src_stride_y == width &&
751 src_stride_u * 2 == width &&
752 src_stride_v * 2 == width &&
753 dst_stride_bgra == width * 4) {
754 width *= height;
755 height = 1;
756 src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
757 }
758 #if defined(HAS_I422TOBGRAROW_NEON)
759 if (TestCpuFlag(kCpuHasNEON)) {
760 I422ToBGRARow = I422ToBGRARow_Any_NEON;
761 if (IS_ALIGNED(width, 16)) {
762 I422ToBGRARow = I422ToBGRARow_NEON;
763 }
764 }
765 #elif defined(HAS_I422TOBGRAROW_SSSE3)
766 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
767 I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
768 if (IS_ALIGNED(width, 8)) {
769 I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
770 if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
771 I422ToBGRARow = I422ToBGRARow_SSSE3;
772 }
773 }
774 }
775 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
776 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
777 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
778 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
779 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
780 IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
781 I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
782 }
783 #endif
785 for (y = 0; y < height; ++y) {
786 I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
787 dst_bgra += dst_stride_bgra;
788 src_y += src_stride_y;
789 src_u += src_stride_u;
790 src_v += src_stride_v;
791 }
792 return 0;
793 }
795 // Convert I422 to ABGR.
796 LIBYUV_API
797 int I422ToABGR(const uint8* src_y, int src_stride_y,
798 const uint8* src_u, int src_stride_u,
799 const uint8* src_v, int src_stride_v,
800 uint8* dst_abgr, int dst_stride_abgr,
801 int width, int height) {
802 int y;
803 void (*I422ToABGRRow)(const uint8* y_buf,
804 const uint8* u_buf,
805 const uint8* v_buf,
806 uint8* rgb_buf,
807 int width) = I422ToABGRRow_C;
808 if (!src_y || !src_u || !src_v ||
809 !dst_abgr ||
810 width <= 0 || height == 0) {
811 return -1;
812 }
813 // Negative height means invert the image.
814 if (height < 0) {
815 height = -height;
816 dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
817 dst_stride_abgr = -dst_stride_abgr;
818 }
819 // Coalesce rows.
820 if (src_stride_y == width &&
821 src_stride_u * 2 == width &&
822 src_stride_v * 2 == width &&
823 dst_stride_abgr == width * 4) {
824 width *= height;
825 height = 1;
826 src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
827 }
828 #if defined(HAS_I422TOABGRROW_NEON)
829 if (TestCpuFlag(kCpuHasNEON)) {
830 I422ToABGRRow = I422ToABGRRow_Any_NEON;
831 if (IS_ALIGNED(width, 16)) {
832 I422ToABGRRow = I422ToABGRRow_NEON;
833 }
834 }
835 #elif defined(HAS_I422TOABGRROW_SSSE3)
836 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
837 I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
838 if (IS_ALIGNED(width, 8)) {
839 I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
840 if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
841 I422ToABGRRow = I422ToABGRRow_SSSE3;
842 }
843 }
844 }
845 #endif
847 for (y = 0; y < height; ++y) {
848 I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
849 dst_abgr += dst_stride_abgr;
850 src_y += src_stride_y;
851 src_u += src_stride_u;
852 src_v += src_stride_v;
853 }
854 return 0;
855 }
857 // Convert I422 to RGBA.
858 LIBYUV_API
859 int I422ToRGBA(const uint8* src_y, int src_stride_y,
860 const uint8* src_u, int src_stride_u,
861 const uint8* src_v, int src_stride_v,
862 uint8* dst_rgba, int dst_stride_rgba,
863 int width, int height) {
864 int y;
865 void (*I422ToRGBARow)(const uint8* y_buf,
866 const uint8* u_buf,
867 const uint8* v_buf,
868 uint8* rgb_buf,
869 int width) = I422ToRGBARow_C;
870 if (!src_y || !src_u || !src_v ||
871 !dst_rgba ||
872 width <= 0 || height == 0) {
873 return -1;
874 }
875 // Negative height means invert the image.
876 if (height < 0) {
877 height = -height;
878 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
879 dst_stride_rgba = -dst_stride_rgba;
880 }
881 // Coalesce rows.
882 if (src_stride_y == width &&
883 src_stride_u * 2 == width &&
884 src_stride_v * 2 == width &&
885 dst_stride_rgba == width * 4) {
886 width *= height;
887 height = 1;
888 src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
889 }
890 #if defined(HAS_I422TORGBAROW_NEON)
891 if (TestCpuFlag(kCpuHasNEON)) {
892 I422ToRGBARow = I422ToRGBARow_Any_NEON;
893 if (IS_ALIGNED(width, 16)) {
894 I422ToRGBARow = I422ToRGBARow_NEON;
895 }
896 }
897 #elif defined(HAS_I422TORGBAROW_SSSE3)
898 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
899 I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
900 if (IS_ALIGNED(width, 8)) {
901 I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
902 if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
903 I422ToRGBARow = I422ToRGBARow_SSSE3;
904 }
905 }
906 }
907 #endif
909 for (y = 0; y < height; ++y) {
910 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
911 dst_rgba += dst_stride_rgba;
912 src_y += src_stride_y;
913 src_u += src_stride_u;
914 src_v += src_stride_v;
915 }
916 return 0;
917 }
919 // Convert NV12 to RGB565.
920 LIBYUV_API
921 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
922 const uint8* src_uv, int src_stride_uv,
923 uint8* dst_rgb565, int dst_stride_rgb565,
924 int width, int height) {
925 int y;
926 void (*NV12ToRGB565Row)(const uint8* y_buf,
927 const uint8* uv_buf,
928 uint8* rgb_buf,
929 int width) = NV12ToRGB565Row_C;
930 if (!src_y || !src_uv || !dst_rgb565 ||
931 width <= 0 || height == 0) {
932 return -1;
933 }
934 // Negative height means invert the image.
935 if (height < 0) {
936 height = -height;
937 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
938 dst_stride_rgb565 = -dst_stride_rgb565;
939 }
940 #if defined(HAS_NV12TORGB565ROW_SSSE3)
941 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
942 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
943 if (IS_ALIGNED(width, 8)) {
944 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
945 }
946 }
947 #elif defined(HAS_NV12TORGB565ROW_NEON)
948 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
949 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
950 if (IS_ALIGNED(width, 8)) {
951 NV12ToRGB565Row = NV12ToRGB565Row_NEON;
952 }
953 }
954 #endif
956 for (y = 0; y < height; ++y) {
957 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
958 dst_rgb565 += dst_stride_rgb565;
959 src_y += src_stride_y;
960 if (y & 1) {
961 src_uv += src_stride_uv;
962 }
963 }
964 return 0;
965 }
967 // Convert NV21 to RGB565.
968 LIBYUV_API
969 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
970 const uint8* src_vu, int src_stride_vu,
971 uint8* dst_rgb565, int dst_stride_rgb565,
972 int width, int height) {
973 int y;
974 void (*NV21ToRGB565Row)(const uint8* y_buf,
975 const uint8* src_vu,
976 uint8* rgb_buf,
977 int width) = NV21ToRGB565Row_C;
978 if (!src_y || !src_vu || !dst_rgb565 ||
979 width <= 0 || height == 0) {
980 return -1;
981 }
982 // Negative height means invert the image.
983 if (height < 0) {
984 height = -height;
985 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
986 dst_stride_rgb565 = -dst_stride_rgb565;
987 }
988 #if defined(HAS_NV21TORGB565ROW_SSSE3)
989 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
990 NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
991 if (IS_ALIGNED(width, 8)) {
992 NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
993 }
994 }
995 #elif defined(HAS_NV21TORGB565ROW_NEON)
996 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
997 NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
998 if (IS_ALIGNED(width, 8)) {
999 NV21ToRGB565Row = NV21ToRGB565Row_NEON;
1000 }
1001 }
1002 #endif
1004 for (y = 0; y < height; ++y) {
1005 NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
1006 dst_rgb565 += dst_stride_rgb565;
1007 src_y += src_stride_y;
1008 if (y & 1) {
1009 src_vu += src_stride_vu;
1010 }
1011 }
1012 return 0;
1013 }
1015 LIBYUV_API
1016 void SetPlane(uint8* dst_y, int dst_stride_y,
1017 int width, int height,
1018 uint32 value) {
1019 int y;
1020 uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
1021 void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
1022 // Coalesce rows.
1023 if (dst_stride_y == width) {
1024 width *= height;
1025 height = 1;
1026 dst_stride_y = 0;
1027 }
1028 #if defined(HAS_SETROW_NEON)
1029 if (TestCpuFlag(kCpuHasNEON) &&
1030 IS_ALIGNED(width, 16) &&
1031 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
1032 SetRow = SetRow_NEON;
1033 }
1034 #endif
1035 #if defined(HAS_SETROW_X86)
1036 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
1037 SetRow = SetRow_X86;
1038 }
1039 #endif
1041 // Set plane
1042 for (y = 0; y < height; ++y) {
1043 SetRow(dst_y, v32, width);
1044 dst_y += dst_stride_y;
1045 }
1046 }
1048 // Draw a rectangle into I420
1049 LIBYUV_API
1050 int I420Rect(uint8* dst_y, int dst_stride_y,
1051 uint8* dst_u, int dst_stride_u,
1052 uint8* dst_v, int dst_stride_v,
1053 int x, int y,
1054 int width, int height,
1055 int value_y, int value_u, int value_v) {
1056 int halfwidth = (width + 1) >> 1;
1057 int halfheight = (height + 1) >> 1;
1058 uint8* start_y = dst_y + y * dst_stride_y + x;
1059 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1060 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1061 if (!dst_y || !dst_u || !dst_v ||
1062 width <= 0 || height <= 0 ||
1063 x < 0 || y < 0 ||
1064 value_y < 0 || value_y > 255 ||
1065 value_u < 0 || value_u > 255 ||
1066 value_v < 0 || value_v > 255) {
1067 return -1;
1068 }
1070 SetPlane(start_y, dst_stride_y, width, height, value_y);
1071 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1072 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1073 return 0;
1074 }
1076 // Draw a rectangle into ARGB
1077 LIBYUV_API
1078 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
1079 int dst_x, int dst_y,
1080 int width, int height,
1081 uint32 value) {
1082 if (!dst_argb ||
1083 width <= 0 || height <= 0 ||
1084 dst_x < 0 || dst_y < 0) {
1085 return -1;
1086 }
1087 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1088 // Coalesce rows.
1089 if (dst_stride_argb == width * 4) {
1090 width *= height;
1091 height = 1;
1092 dst_stride_argb = 0;
1093 }
1094 #if defined(HAS_SETROW_NEON)
1095 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
1096 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1097 ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
1098 return 0;
1099 }
1100 #endif
1101 #if defined(HAS_SETROW_X86)
1102 if (TestCpuFlag(kCpuHasX86)) {
1103 ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
1104 return 0;
1105 }
1106 #endif
1107 ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
1108 return 0;
1109 }
1111 // Convert unattentuated ARGB to preattenuated ARGB.
1112 // An unattenutated ARGB alpha blend uses the formula
1113 // p = a * f + (1 - a) * b
1114 // where
1115 // p is output pixel
1116 // f is foreground pixel
1117 // b is background pixel
1118 // a is alpha value from foreground pixel
1119 // An preattenutated ARGB alpha blend uses the formula
1120 // p = f + (1 - a) * b
1121 // where
1122 // f is foreground pixel premultiplied by alpha
1124 LIBYUV_API
1125 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1126 uint8* dst_argb, int dst_stride_argb,
1127 int width, int height) {
1128 int y;
1129 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1130 int width) = ARGBAttenuateRow_C;
1131 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1132 return -1;
1133 }
1134 if (height < 0) {
1135 height = -height;
1136 src_argb = src_argb + (height - 1) * src_stride_argb;
1137 src_stride_argb = -src_stride_argb;
1138 }
1139 // Coalesce rows.
1140 if (src_stride_argb == width * 4 &&
1141 dst_stride_argb == width * 4) {
1142 width *= height;
1143 height = 1;
1144 src_stride_argb = dst_stride_argb = 0;
1145 }
1146 #if defined(HAS_ARGBATTENUATEROW_SSE2)
1147 if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
1148 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1149 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1150 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
1151 if (IS_ALIGNED(width, 4)) {
1152 ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
1153 }
1154 }
1155 #endif
1156 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1157 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1158 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1159 if (IS_ALIGNED(width, 4)) {
1160 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1161 }
1162 }
1163 #endif
1164 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1165 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1166 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1167 if (IS_ALIGNED(width, 8)) {
1168 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1169 }
1170 }
1171 #endif
1172 #if defined(HAS_ARGBATTENUATEROW_NEON)
1173 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1174 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1175 if (IS_ALIGNED(width, 8)) {
1176 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1177 }
1178 }
1179 #endif
1181 for (y = 0; y < height; ++y) {
1182 ARGBAttenuateRow(src_argb, dst_argb, width);
1183 src_argb += src_stride_argb;
1184 dst_argb += dst_stride_argb;
1185 }
1186 return 0;
1187 }
1189 // Convert preattentuated ARGB to unattenuated ARGB.
1190 LIBYUV_API
1191 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1192 uint8* dst_argb, int dst_stride_argb,
1193 int width, int height) {
1194 int y;
1195 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1196 int width) = ARGBUnattenuateRow_C;
1197 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1198 return -1;
1199 }
1200 if (height < 0) {
1201 height = -height;
1202 src_argb = src_argb + (height - 1) * src_stride_argb;
1203 src_stride_argb = -src_stride_argb;
1204 }
1205 // Coalesce rows.
1206 if (src_stride_argb == width * 4 &&
1207 dst_stride_argb == width * 4) {
1208 width *= height;
1209 height = 1;
1210 src_stride_argb = dst_stride_argb = 0;
1211 }
1212 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1213 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1214 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1215 if (IS_ALIGNED(width, 4)) {
1216 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1217 }
1218 }
1219 #endif
1220 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1221 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1222 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1223 if (IS_ALIGNED(width, 8)) {
1224 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1225 }
1226 }
1227 #endif
1228 // TODO(fbarchard): Neon version.
1230 for (y = 0; y < height; ++y) {
1231 ARGBUnattenuateRow(src_argb, dst_argb, width);
1232 src_argb += src_stride_argb;
1233 dst_argb += dst_stride_argb;
1234 }
1235 return 0;
1236 }
1238 // Convert ARGB to Grayed ARGB.
1239 LIBYUV_API
1240 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1241 uint8* dst_argb, int dst_stride_argb,
1242 int width, int height) {
1243 int y;
1244 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1245 int width) = ARGBGrayRow_C;
1246 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1247 return -1;
1248 }
1249 if (height < 0) {
1250 height = -height;
1251 src_argb = src_argb + (height - 1) * src_stride_argb;
1252 src_stride_argb = -src_stride_argb;
1253 }
1254 // Coalesce rows.
1255 if (src_stride_argb == width * 4 &&
1256 dst_stride_argb == width * 4) {
1257 width *= height;
1258 height = 1;
1259 src_stride_argb = dst_stride_argb = 0;
1260 }
1261 #if defined(HAS_ARGBGRAYROW_SSSE3)
1262 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1263 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1264 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1265 ARGBGrayRow = ARGBGrayRow_SSSE3;
1266 }
1267 #elif defined(HAS_ARGBGRAYROW_NEON)
1268 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1269 ARGBGrayRow = ARGBGrayRow_NEON;
1270 }
1271 #endif
1273 for (y = 0; y < height; ++y) {
1274 ARGBGrayRow(src_argb, dst_argb, width);
1275 src_argb += src_stride_argb;
1276 dst_argb += dst_stride_argb;
1277 }
1278 return 0;
1279 }
1281 // Make a rectangle of ARGB gray scale.
1282 LIBYUV_API
1283 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1284 int dst_x, int dst_y,
1285 int width, int height) {
1286 int y;
1287 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1288 int width) = ARGBGrayRow_C;
1289 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1290 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1291 return -1;
1292 }
1293 // Coalesce rows.
1294 if (dst_stride_argb == width * 4) {
1295 width *= height;
1296 height = 1;
1297 dst_stride_argb = 0;
1298 }
1299 #if defined(HAS_ARGBGRAYROW_SSSE3)
1300 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1301 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1302 ARGBGrayRow = ARGBGrayRow_SSSE3;
1303 }
1304 #elif defined(HAS_ARGBGRAYROW_NEON)
1305 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1306 ARGBGrayRow = ARGBGrayRow_NEON;
1307 }
1308 #endif
1309 for (y = 0; y < height; ++y) {
1310 ARGBGrayRow(dst, dst, width);
1311 dst += dst_stride_argb;
1312 }
1313 return 0;
1314 }
1316 // Make a rectangle of ARGB Sepia tone.
1317 LIBYUV_API
1318 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1319 int dst_x, int dst_y, int width, int height) {
1320 int y;
1321 void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1322 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1323 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1324 return -1;
1325 }
1326 // Coalesce rows.
1327 if (dst_stride_argb == width * 4) {
1328 width *= height;
1329 height = 1;
1330 dst_stride_argb = 0;
1331 }
1332 #if defined(HAS_ARGBSEPIAROW_SSSE3)
1333 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1334 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1335 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1336 }
1337 #elif defined(HAS_ARGBSEPIAROW_NEON)
1338 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1339 ARGBSepiaRow = ARGBSepiaRow_NEON;
1340 }
1341 #endif
1342 for (y = 0; y < height; ++y) {
1343 ARGBSepiaRow(dst, width);
1344 dst += dst_stride_argb;
1345 }
1346 return 0;
1347 }
1349 // Apply a 4x4 matrix to each ARGB pixel.
1350 // Note: Normally for shading, but can be used to swizzle or invert.
1351 LIBYUV_API
1352 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
1353 uint8* dst_argb, int dst_stride_argb,
1354 const int8* matrix_argb,
1355 int width, int height) {
1356 int y;
1357 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1358 const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
1359 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1360 return -1;
1361 }
1362 if (height < 0) {
1363 height = -height;
1364 src_argb = src_argb + (height - 1) * src_stride_argb;
1365 src_stride_argb = -src_stride_argb;
1366 }
1367 // Coalesce rows.
1368 if (src_stride_argb == width * 4 &&
1369 dst_stride_argb == width * 4) {
1370 width *= height;
1371 height = 1;
1372 src_stride_argb = dst_stride_argb = 0;
1373 }
1374 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1375 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1376 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1377 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1378 }
1379 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
1380 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1381 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1382 }
1383 #endif
1384 for (y = 0; y < height; ++y) {
1385 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1386 src_argb += src_stride_argb;
1387 dst_argb += dst_stride_argb;
1388 }
1389 return 0;
1390 }
1392 // Apply a 4x3 matrix to each ARGB pixel.
1393 // Deprecated.
1394 LIBYUV_API
1395 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1396 const int8* matrix_rgb,
1397 int dst_x, int dst_y, int width, int height) {
1398 SIMD_ALIGNED(int8 matrix_argb[16]);
1399 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1400 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
1401 dst_x < 0 || dst_y < 0) {
1402 return -1;
1403 }
1405 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
1406 matrix_argb[0] = matrix_rgb[0] / 2;
1407 matrix_argb[1] = matrix_rgb[1] / 2;
1408 matrix_argb[2] = matrix_rgb[2] / 2;
1409 matrix_argb[3] = matrix_rgb[3] / 2;
1410 matrix_argb[4] = matrix_rgb[4] / 2;
1411 matrix_argb[5] = matrix_rgb[5] / 2;
1412 matrix_argb[6] = matrix_rgb[6] / 2;
1413 matrix_argb[7] = matrix_rgb[7] / 2;
1414 matrix_argb[8] = matrix_rgb[8] / 2;
1415 matrix_argb[9] = matrix_rgb[9] / 2;
1416 matrix_argb[10] = matrix_rgb[10] / 2;
1417 matrix_argb[11] = matrix_rgb[11] / 2;
1418 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
1419 matrix_argb[15] = 64; // 1.0
1421 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
1422 dst, dst_stride_argb,
1423 &matrix_argb[0], width, height);
1424 }
1426 // Apply a color table each ARGB pixel.
1427 // Table contains 256 ARGB values.
1428 LIBYUV_API
1429 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1430 const uint8* table_argb,
1431 int dst_x, int dst_y, int width, int height) {
1432 int y;
1433 void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1434 int width) = ARGBColorTableRow_C;
1435 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1436 if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1437 dst_x < 0 || dst_y < 0) {
1438 return -1;
1439 }
1440 // Coalesce rows.
1441 if (dst_stride_argb == width * 4) {
1442 width *= height;
1443 height = 1;
1444 dst_stride_argb = 0;
1445 }
1446 #if defined(HAS_ARGBCOLORTABLEROW_X86)
1447 if (TestCpuFlag(kCpuHasX86)) {
1448 ARGBColorTableRow = ARGBColorTableRow_X86;
1449 }
1450 #endif
1451 for (y = 0; y < height; ++y) {
1452 ARGBColorTableRow(dst, table_argb, width);
1453 dst += dst_stride_argb;
1454 }
1455 return 0;
1456 }
1458 // Apply a color table each ARGB pixel but preserve destination alpha.
1459 // Table contains 256 ARGB values.
1460 LIBYUV_API
1461 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
1462 const uint8* table_argb,
1463 int dst_x, int dst_y, int width, int height) {
1464 int y;
1465 void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1466 int width) = RGBColorTableRow_C;
1467 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1468 if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1469 dst_x < 0 || dst_y < 0) {
1470 return -1;
1471 }
1472 // Coalesce rows.
1473 if (dst_stride_argb == width * 4) {
1474 width *= height;
1475 height = 1;
1476 dst_stride_argb = 0;
1477 }
1478 #if defined(HAS_RGBCOLORTABLEROW_X86)
1479 if (TestCpuFlag(kCpuHasX86)) {
1480 RGBColorTableRow = RGBColorTableRow_X86;
1481 }
1482 #endif
1483 for (y = 0; y < height; ++y) {
1484 RGBColorTableRow(dst, table_argb, width);
1485 dst += dst_stride_argb;
1486 }
1487 return 0;
1488 }
1490 // ARGBQuantize is used to posterize art.
1491 // e.g. rgb / qvalue * qvalue + qvalue / 2
1492 // But the low levels implement efficiently with 3 parameters, and could be
1493 // used for other high level operations.
1494 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1495 // where scale is 1 / interval_size as a fixed point value.
1496 // The divide is replaces with a multiply by reciprocal fixed point multiply.
1497 // Caveat - although SSE2 saturates, the C function does not and should be used
1498 // with care if doing anything but quantization.
1499 LIBYUV_API
1500 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1501 int scale, int interval_size, int interval_offset,
1502 int dst_x, int dst_y, int width, int height) {
1503 int y;
1504 void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1505 int interval_offset, int width) = ARGBQuantizeRow_C;
1506 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1507 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1508 interval_size < 1 || interval_size > 255) {
1509 return -1;
1510 }
1511 // Coalesce rows.
1512 if (dst_stride_argb == width * 4) {
1513 width *= height;
1514 height = 1;
1515 dst_stride_argb = 0;
1516 }
1517 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
1518 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1519 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1520 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1521 }
1522 #elif defined(HAS_ARGBQUANTIZEROW_NEON)
1523 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1524 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
1525 }
1526 #endif
1527 for (y = 0; y < height; ++y) {
1528 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1529 dst += dst_stride_argb;
1530 }
1531 return 0;
1532 }
1534 // Computes table of cumulative sum for image where the value is the sum
1535 // of all values above and to the left of the entry. Used by ARGBBlur.
1536 LIBYUV_API
1537 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1538 int32* dst_cumsum, int dst_stride32_cumsum,
1539 int width, int height) {
1540 int y;
1541 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1542 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1543 int32* previous_cumsum = dst_cumsum;
1544 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1545 return -1;
1546 }
1547 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1548 if (TestCpuFlag(kCpuHasSSE2)) {
1549 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1550 }
1551 #endif
1552 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
1553 for (y = 0; y < height; ++y) {
1554 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1555 previous_cumsum = dst_cumsum;
1556 dst_cumsum += dst_stride32_cumsum;
1557 src_argb += src_stride_argb;
1558 }
1559 return 0;
1560 }
1562 // Blur ARGB image.
1563 // Caller should allocate CumulativeSum table of width * height * 16 bytes
1564 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1565 // as the buffer is treated as circular.
1566 LIBYUV_API
1567 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1568 uint8* dst_argb, int dst_stride_argb,
1569 int32* dst_cumsum, int dst_stride32_cumsum,
1570 int width, int height, int radius) {
1571 int y;
1572 void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
1573 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1574 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
1575 int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
1576 int32* cumsum_bot_row;
1577 int32* max_cumsum_bot_row;
1578 int32* cumsum_top_row;
1580 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1581 return -1;
1582 }
1583 if (height < 0) {
1584 height = -height;
1585 src_argb = src_argb + (height - 1) * src_stride_argb;
1586 src_stride_argb = -src_stride_argb;
1587 }
1588 if (radius > height) {
1589 radius = height;
1590 }
1591 if (radius > (width / 2 - 1)) {
1592 radius = width / 2 - 1;
1593 }
1594 if (radius <= 0) {
1595 return -1;
1596 }
1597 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1598 if (TestCpuFlag(kCpuHasSSE2)) {
1599 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1600 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
1601 }
1602 #endif
1603 // Compute enough CumulativeSum for first row to be blurred. After this
1604 // one row of CumulativeSum is updated at a time.
1605 ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1606 dst_cumsum, dst_stride32_cumsum,
1607 width, radius);
1609 src_argb = src_argb + radius * src_stride_argb;
1610 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1612 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1613 cumsum_top_row = &dst_cumsum[0];
1615 for (y = 0; y < height; ++y) {
1616 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1617 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1618 int area = radius * (bot_y - top_y);
1619 int boxwidth = radius * 4;
1620 int x;
1621 int n;
1623 // Increment cumsum_top_row pointer with circular buffer wrap around.
1624 if (top_y) {
1625 cumsum_top_row += dst_stride32_cumsum;
1626 if (cumsum_top_row >= max_cumsum_bot_row) {
1627 cumsum_top_row = dst_cumsum;
1628 }
1629 }
1630 // Increment cumsum_bot_row pointer with circular buffer wrap around and
1631 // then fill in a row of CumulativeSum.
1632 if ((y + radius) < height) {
1633 const int32* prev_cumsum_bot_row = cumsum_bot_row;
1634 cumsum_bot_row += dst_stride32_cumsum;
1635 if (cumsum_bot_row >= max_cumsum_bot_row) {
1636 cumsum_bot_row = dst_cumsum;
1637 }
1638 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1639 width);
1640 src_argb += src_stride_argb;
1641 }
1643 // Left clipped.
1644 for (x = 0; x < radius + 1; ++x) {
1645 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1646 boxwidth, area, &dst_argb[x * 4], 1);
1647 area += (bot_y - top_y);
1648 boxwidth += 4;
1649 }
1651 // Middle unclipped.
1652 n = (width - 1) - radius - x + 1;
1653 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1654 boxwidth, area, &dst_argb[x * 4], n);
1656 // Right clipped.
1657 for (x += n; x <= width - 1; ++x) {
1658 area -= (bot_y - top_y);
1659 boxwidth -= 4;
1660 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
1661 cumsum_bot_row + (x - radius - 1) * 4,
1662 boxwidth, area, &dst_argb[x * 4], 1);
1663 }
1664 dst_argb += dst_stride_argb;
1665 }
1666 return 0;
1667 }
1669 // Multiply ARGB image by a specified ARGB value.
1670 LIBYUV_API
1671 int ARGBShade(const uint8* src_argb, int src_stride_argb,
1672 uint8* dst_argb, int dst_stride_argb,
1673 int width, int height, uint32 value) {
1674 int y;
1675 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1676 int width, uint32 value) = ARGBShadeRow_C;
1677 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1678 return -1;
1679 }
1680 if (height < 0) {
1681 height = -height;
1682 src_argb = src_argb + (height - 1) * src_stride_argb;
1683 src_stride_argb = -src_stride_argb;
1684 }
1685 // Coalesce rows.
1686 if (src_stride_argb == width * 4 &&
1687 dst_stride_argb == width * 4) {
1688 width *= height;
1689 height = 1;
1690 src_stride_argb = dst_stride_argb = 0;
1691 }
1692 #if defined(HAS_ARGBSHADEROW_SSE2)
1693 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1694 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1695 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1696 ARGBShadeRow = ARGBShadeRow_SSE2;
1697 }
1698 #elif defined(HAS_ARGBSHADEROW_NEON)
1699 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1700 ARGBShadeRow = ARGBShadeRow_NEON;
1701 }
1702 #endif
1704 for (y = 0; y < height; ++y) {
1705 ARGBShadeRow(src_argb, dst_argb, width, value);
1706 src_argb += src_stride_argb;
1707 dst_argb += dst_stride_argb;
1708 }
1709 return 0;
1710 }
1712 // Interpolate 2 ARGB images by specified amount (0 to 255).
1713 LIBYUV_API
1714 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1715 const uint8* src_argb1, int src_stride_argb1,
1716 uint8* dst_argb, int dst_stride_argb,
1717 int width, int height, int interpolation) {
1718 int y;
1719 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1720 ptrdiff_t src_stride, int dst_width,
1721 int source_y_fraction) = InterpolateRow_C;
1722 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1723 return -1;
1724 }
1725 // Negative height means invert the image.
1726 if (height < 0) {
1727 height = -height;
1728 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1729 dst_stride_argb = -dst_stride_argb;
1730 }
1731 // Coalesce rows.
1732 if (src_stride_argb0 == width * 4 &&
1733 src_stride_argb1 == width * 4 &&
1734 dst_stride_argb == width * 4) {
1735 width *= height;
1736 height = 1;
1737 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1738 }
1739 #if defined(HAS_INTERPOLATEROW_SSE2)
1740 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1741 InterpolateRow = InterpolateRow_Any_SSE2;
1742 if (IS_ALIGNED(width, 4)) {
1743 InterpolateRow = InterpolateRow_Unaligned_SSE2;
1744 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1745 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1746 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1747 InterpolateRow = InterpolateRow_SSE2;
1748 }
1749 }
1750 }
1751 #endif
1752 #if defined(HAS_INTERPOLATEROW_SSSE3)
1753 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1754 InterpolateRow = InterpolateRow_Any_SSSE3;
1755 if (IS_ALIGNED(width, 4)) {
1756 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
1757 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1758 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1759 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1760 InterpolateRow = InterpolateRow_SSSE3;
1761 }
1762 }
1763 }
1764 #endif
1765 #if defined(HAS_INTERPOLATEROW_AVX2)
1766 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1767 InterpolateRow = InterpolateRow_Any_AVX2;
1768 if (IS_ALIGNED(width, 8)) {
1769 InterpolateRow = InterpolateRow_AVX2;
1770 }
1771 }
1772 #endif
1773 #if defined(HAS_INTERPOLATEROW_NEON)
1774 if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1775 InterpolateRow = InterpolateRow_Any_NEON;
1776 if (IS_ALIGNED(width, 4)) {
1777 InterpolateRow = InterpolateRow_NEON;
1778 }
1779 }
1780 #endif
1781 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
1782 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
1783 IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
1784 IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
1785 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
1786 ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
1787 }
1788 #endif
1790 for (y = 0; y < height; ++y) {
1791 InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
1792 width * 4, interpolation);
1793 src_argb0 += src_stride_argb0;
1794 src_argb1 += src_stride_argb1;
1795 dst_argb += dst_stride_argb;
1796 }
1797 return 0;
1798 }
1800 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
1801 LIBYUV_API
1802 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
1803 uint8* dst_argb, int dst_stride_argb,
1804 const uint8* shuffler, int width, int height) {
1805 int y;
1806 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
1807 const uint8* shuffler, int pix) = ARGBShuffleRow_C;
1808 if (!src_bgra || !dst_argb ||
1809 width <= 0 || height == 0) {
1810 return -1;
1811 }
1812 // Negative height means invert the image.
1813 if (height < 0) {
1814 height = -height;
1815 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
1816 src_stride_bgra = -src_stride_bgra;
1817 }
1818 // Coalesce rows.
1819 if (src_stride_bgra == width * 4 &&
1820 dst_stride_argb == width * 4) {
1821 width *= height;
1822 height = 1;
1823 src_stride_bgra = dst_stride_argb = 0;
1824 }
1825 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
1826 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1827 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
1828 if (IS_ALIGNED(width, 4)) {
1829 ARGBShuffleRow = ARGBShuffleRow_SSE2;
1830 }
1831 }
1832 #endif
1833 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
1834 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1835 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
1836 if (IS_ALIGNED(width, 8)) {
1837 ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
1838 if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
1839 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1840 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
1841 }
1842 }
1843 }
1844 #endif
1845 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
1846 if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
1847 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
1848 if (IS_ALIGNED(width, 16)) {
1849 ARGBShuffleRow = ARGBShuffleRow_AVX2;
1850 }
1851 }
1852 #endif
1853 #if defined(HAS_ARGBSHUFFLEROW_NEON)
1854 if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1855 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
1856 if (IS_ALIGNED(width, 4)) {
1857 ARGBShuffleRow = ARGBShuffleRow_NEON;
1858 }
1859 }
1860 #endif
1862 for (y = 0; y < height; ++y) {
1863 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
1864 src_bgra += src_stride_bgra;
1865 dst_argb += dst_stride_argb;
1866 }
1867 return 0;
1868 }
1870 // Sobel ARGB effect.
1871 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
1872 uint8* dst_argb, int dst_stride_argb,
1873 int width, int height,
1874 void (*SobelRow)(const uint8* src_sobelx,
1875 const uint8* src_sobely,
1876 uint8* dst, int width)) {
1877 int y;
1878 void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
1879 uint32 selector, int pix) = ARGBToBayerGGRow_C;
1880 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
1881 uint8* dst_sobely, int width) = SobelYRow_C;
1882 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
1883 const uint8* src_y2, uint8* dst_sobely, int width) =
1884 SobelXRow_C;
1885 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
1886 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1887 return -1;
1888 }
1889 // Negative height means invert the image.
1890 if (height < 0) {
1891 height = -height;
1892 src_argb = src_argb + (height - 1) * src_stride_argb;
1893 src_stride_argb = -src_stride_argb;
1894 }
1895 // ARGBToBayer used to select G channel from ARGB.
1896 #if defined(HAS_ARGBTOBAYERGGROW_SSE2)
1897 if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
1898 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1899 ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
1900 if (IS_ALIGNED(width, 8)) {
1901 ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
1902 }
1903 }
1904 #endif
1905 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
1906 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
1907 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1908 ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
1909 if (IS_ALIGNED(width, 8)) {
1910 ARGBToBayerRow = ARGBToBayerRow_SSSE3;
1911 }
1912 }
1913 #endif
1914 #if defined(HAS_ARGBTOBAYERGGROW_NEON)
1915 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1916 ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
1917 if (IS_ALIGNED(width, 8)) {
1918 ARGBToBayerRow = ARGBToBayerGGRow_NEON;
1919 }
1920 }
1921 #endif
1922 #if defined(HAS_SOBELYROW_SSE2)
1923 if (TestCpuFlag(kCpuHasSSE2)) {
1924 SobelYRow = SobelYRow_SSE2;
1925 }
1926 #endif
1927 #if defined(HAS_SOBELYROW_NEON)
1928 if (TestCpuFlag(kCpuHasNEON)) {
1929 SobelYRow = SobelYRow_NEON;
1930 }
1931 #endif
1932 #if defined(HAS_SOBELXROW_SSE2)
1933 if (TestCpuFlag(kCpuHasSSE2)) {
1934 SobelXRow = SobelXRow_SSE2;
1935 }
1936 #endif
1937 #if defined(HAS_SOBELXROW_NEON)
1938 if (TestCpuFlag(kCpuHasNEON)) {
1939 SobelXRow = SobelXRow_NEON;
1940 }
1941 #endif
1942 {
1943 // 3 rows with edges before/after.
1944 const int kRowSize = (width + kEdge + 15) & ~15;
1945 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
1946 uint8* row_sobelx = rows;
1947 uint8* row_sobely = rows + kRowSize;
1948 uint8* row_y = rows + kRowSize * 2;
1950 // Convert first row.
1951 uint8* row_y0 = row_y + kEdge;
1952 uint8* row_y1 = row_y0 + kRowSize;
1953 uint8* row_y2 = row_y1 + kRowSize;
1954 ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
1955 row_y0[-1] = row_y0[0];
1956 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
1957 ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
1958 row_y1[-1] = row_y1[0];
1959 memset(row_y1 + width, row_y1[width - 1], 16);
1960 memset(row_y2 + width, 0, 16);
1962 for (y = 0; y < height; ++y) {
1963 // Convert next row of ARGB to Y.
1964 if (y < (height - 1)) {
1965 src_argb += src_stride_argb;
1966 }
1967 ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
1968 row_y2[-1] = row_y2[0];
1969 row_y2[width] = row_y2[width - 1];
1971 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
1972 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
1973 SobelRow(row_sobelx, row_sobely, dst_argb, width);
1975 // Cycle thru circular queue of 3 row_y buffers.
1976 {
1977 uint8* row_yt = row_y0;
1978 row_y0 = row_y1;
1979 row_y1 = row_y2;
1980 row_y2 = row_yt;
1981 }
1983 dst_argb += dst_stride_argb;
1984 }
1985 free_aligned_buffer_64(rows);
1986 }
1987 return 0;
1988 }
1990 // Sobel ARGB effect.
1991 LIBYUV_API
1992 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
1993 uint8* dst_argb, int dst_stride_argb,
1994 int width, int height) {
1995 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
1996 uint8* dst_argb, int width) = SobelRow_C;
1997 #if defined(HAS_SOBELROW_SSE2)
1998 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
1999 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2000 SobelRow = SobelRow_SSE2;
2001 }
2002 #endif
2003 #if defined(HAS_SOBELROW_NEON)
2004 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2005 SobelRow = SobelRow_NEON;
2006 }
2007 #endif
2008 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2009 width, height, SobelRow);
2010 }
2012 // Sobel ARGB effect with planar output.
2013 LIBYUV_API
2014 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
2015 uint8* dst_y, int dst_stride_y,
2016 int width, int height) {
2017 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2018 uint8* dst_, int width) = SobelToPlaneRow_C;
2019 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2020 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2021 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
2022 SobelToPlaneRow = SobelToPlaneRow_SSE2;
2023 }
2024 #endif
2025 #if defined(HAS_SOBELTOPLANEROW_NEON)
2026 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
2027 SobelToPlaneRow = SobelToPlaneRow_NEON;
2028 }
2029 #endif
2030 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
2031 width, height, SobelToPlaneRow);
2032 }
2034 // SobelXY ARGB effect.
2035 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
2036 LIBYUV_API
2037 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
2038 uint8* dst_argb, int dst_stride_argb,
2039 int width, int height) {
2040 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2041 uint8* dst_argb, int width) = SobelXYRow_C;
2042 #if defined(HAS_SOBELXYROW_SSE2)
2043 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2044 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2045 SobelXYRow = SobelXYRow_SSE2;
2046 }
2047 #endif
2048 #if defined(HAS_SOBELXYROW_NEON)
2049 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2050 SobelXYRow = SobelXYRow_NEON;
2051 }
2052 #endif
2053 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2054 width, height, SobelXYRow);
2055 }
2057 // Apply a 4x4 polynomial to each ARGB pixel.
2058 LIBYUV_API
2059 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
2060 uint8* dst_argb, int dst_stride_argb,
2061 const float* poly,
2062 int width, int height) {
2063 int y;
2064 void (*ARGBPolynomialRow)(const uint8* src_argb,
2065 uint8* dst_argb, const float* poly,
2066 int width) = ARGBPolynomialRow_C;
2067 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2068 return -1;
2069 }
2070 // Negative height means invert the image.
2071 if (height < 0) {
2072 height = -height;
2073 src_argb = src_argb + (height - 1) * src_stride_argb;
2074 src_stride_argb = -src_stride_argb;
2075 }
2076 // Coalesce rows.
2077 if (src_stride_argb == width * 4 &&
2078 dst_stride_argb == width * 4) {
2079 width *= height;
2080 height = 1;
2081 src_stride_argb = dst_stride_argb = 0;
2082 }
2083 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2084 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2085 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2086 }
2087 #endif
2088 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2089 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2090 IS_ALIGNED(width, 2)) {
2091 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2092 }
2093 #endif
2095 for (y = 0; y < height; ++y) {
2096 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2097 src_argb += src_stride_argb;
2098 dst_argb += dst_stride_argb;
2099 }
2100 return 0;
2101 }
2103 // Apply a lumacolortable to each ARGB pixel.
2104 LIBYUV_API
2105 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
2106 uint8* dst_argb, int dst_stride_argb,
2107 const uint8* luma,
2108 int width, int height) {
2109 int y;
2110 void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
2111 int width, const uint8* luma, const uint32 lumacoeff) =
2112 ARGBLumaColorTableRow_C;
2113 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2114 return -1;
2115 }
2116 // Negative height means invert the image.
2117 if (height < 0) {
2118 height = -height;
2119 src_argb = src_argb + (height - 1) * src_stride_argb;
2120 src_stride_argb = -src_stride_argb;
2121 }
2122 // Coalesce rows.
2123 if (src_stride_argb == width * 4 &&
2124 dst_stride_argb == width * 4) {
2125 width *= height;
2126 height = 1;
2127 src_stride_argb = dst_stride_argb = 0;
2128 }
2129 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2130 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2131 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2132 }
2133 #endif
2135 for (y = 0; y < height; ++y) {
2136 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2137 src_argb += src_stride_argb;
2138 dst_argb += dst_stride_argb;
2139 }
2140 return 0;
2141 }
2143 // Copy Alpha from one ARGB image to another.
2144 LIBYUV_API
2145 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
2146 uint8* dst_argb, int dst_stride_argb,
2147 int width, int height) {
2148 int y;
2149 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2150 ARGBCopyAlphaRow_C;
2151 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2152 return -1;
2153 }
2154 // Negative height means invert the image.
2155 if (height < 0) {
2156 height = -height;
2157 src_argb = src_argb + (height - 1) * src_stride_argb;
2158 src_stride_argb = -src_stride_argb;
2159 }
2160 // Coalesce rows.
2161 if (src_stride_argb == width * 4 &&
2162 dst_stride_argb == width * 4) {
2163 width *= height;
2164 height = 1;
2165 src_stride_argb = dst_stride_argb = 0;
2166 }
2167 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2168 if (TestCpuFlag(kCpuHasSSE2) &&
2169 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
2170 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2171 IS_ALIGNED(width, 8)) {
2172 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2173 }
2174 #endif
2175 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2176 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2177 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2178 }
2179 #endif
2181 for (y = 0; y < height; ++y) {
2182 ARGBCopyAlphaRow(src_argb, dst_argb, width);
2183 src_argb += src_stride_argb;
2184 dst_argb += dst_stride_argb;
2185 }
2186 return 0;
2187 }
2189 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
2190 LIBYUV_API
2191 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
2192 uint8* dst_argb, int dst_stride_argb,
2193 int width, int height) {
2194 int y;
2195 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
2196 ARGBCopyYToAlphaRow_C;
2197 if (!src_y || !dst_argb || width <= 0 || height == 0) {
2198 return -1;
2199 }
2200 // Negative height means invert the image.
2201 if (height < 0) {
2202 height = -height;
2203 src_y = src_y + (height - 1) * src_stride_y;
2204 src_stride_y = -src_stride_y;
2205 }
2206 // Coalesce rows.
2207 if (src_stride_y == width &&
2208 dst_stride_argb == width * 4) {
2209 width *= height;
2210 height = 1;
2211 src_stride_y = dst_stride_argb = 0;
2212 }
2213 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
2214 if (TestCpuFlag(kCpuHasSSE2) &&
2215 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
2216 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2217 IS_ALIGNED(width, 8)) {
2218 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
2219 }
2220 #endif
2221 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
2222 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2223 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
2224 }
2225 #endif
2227 for (y = 0; y < height; ++y) {
2228 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
2229 src_y += src_stride_y;
2230 dst_argb += dst_stride_argb;
2231 }
2232 return 0;
2233 }
2235 #ifdef __cplusplus
2236 } // extern "C"
2237 } // namespace libyuv
2238 #endif