1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/planar_functions.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2238 @@ 1.4 +/* 1.5 + * Copyright 2011 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/planar_functions.h" 1.15 + 1.16 +#include <string.h> // for memset() 1.17 + 1.18 +#include "libyuv/cpu_id.h" 1.19 +#ifdef HAVE_JPEG 1.20 +#include "libyuv/mjpeg_decoder.h" 1.21 +#endif 1.22 +#include "libyuv/row.h" 1.23 + 1.24 +#ifdef __cplusplus 1.25 +namespace libyuv { 1.26 +extern "C" { 1.27 +#endif 1.28 + 1.29 +// Copy a plane of data 1.30 +LIBYUV_API 1.31 +void CopyPlane(const uint8* src_y, int src_stride_y, 1.32 + uint8* dst_y, int dst_stride_y, 1.33 + int width, int height) { 1.34 + int y; 1.35 + void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; 1.36 + // Coalesce rows. 1.37 + if (src_stride_y == width && 1.38 + dst_stride_y == width) { 1.39 + width *= height; 1.40 + height = 1; 1.41 + src_stride_y = dst_stride_y = 0; 1.42 + } 1.43 +#if defined(HAS_COPYROW_X86) 1.44 + if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 1.45 + CopyRow = CopyRow_X86; 1.46 + } 1.47 +#endif 1.48 +#if defined(HAS_COPYROW_SSE2) 1.49 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && 1.50 + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 1.51 + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1.52 + CopyRow = CopyRow_SSE2; 1.53 + } 1.54 +#endif 1.55 +#if defined(HAS_COPYROW_ERMS) 1.56 + if (TestCpuFlag(kCpuHasERMS)) { 1.57 + CopyRow = CopyRow_ERMS; 1.58 + } 1.59 +#endif 1.60 +#if defined(HAS_COPYROW_NEON) 1.61 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { 1.62 + CopyRow = CopyRow_NEON; 1.63 + } 1.64 +#endif 1.65 +#if defined(HAS_COPYROW_MIPS) 1.66 + if (TestCpuFlag(kCpuHasMIPS)) { 1.67 + CopyRow = CopyRow_MIPS; 1.68 + } 1.69 +#endif 1.70 + 1.71 + // Copy plane 1.72 + for (y = 0; y < height; ++y) { 1.73 + CopyRow(src_y, dst_y, width); 1.74 + src_y += src_stride_y; 1.75 + dst_y += dst_stride_y; 1.76 + } 1.77 +} 1.78 + 1.79 +// Copy I422. 1.80 +LIBYUV_API 1.81 +int I422Copy(const uint8* src_y, int src_stride_y, 1.82 + const uint8* src_u, int src_stride_u, 1.83 + const uint8* src_v, int src_stride_v, 1.84 + uint8* dst_y, int dst_stride_y, 1.85 + uint8* dst_u, int dst_stride_u, 1.86 + uint8* dst_v, int dst_stride_v, 1.87 + int width, int height) { 1.88 + int halfwidth = (width + 1) >> 1; 1.89 + if (!src_y || !src_u || !src_v || 1.90 + !dst_y || !dst_u || !dst_v || 1.91 + width <= 0 || height == 0) { 1.92 + return -1; 1.93 + } 1.94 + // Negative height means invert the image. 1.95 + if (height < 0) { 1.96 + height = -height; 1.97 + src_y = src_y + (height - 1) * src_stride_y; 1.98 + src_u = src_u + (height - 1) * src_stride_u; 1.99 + src_v = src_v + (height - 1) * src_stride_v; 1.100 + src_stride_y = -src_stride_y; 1.101 + src_stride_u = -src_stride_u; 1.102 + src_stride_v = -src_stride_v; 1.103 + } 1.104 + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 1.105 + CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); 1.106 + CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); 1.107 + return 0; 1.108 +} 1.109 + 1.110 +// Copy I444. 1.111 +LIBYUV_API 1.112 +int I444Copy(const uint8* src_y, int src_stride_y, 1.113 + const uint8* src_u, int src_stride_u, 1.114 + const uint8* src_v, int src_stride_v, 1.115 + uint8* dst_y, int dst_stride_y, 1.116 + uint8* dst_u, int dst_stride_u, 1.117 + uint8* dst_v, int dst_stride_v, 1.118 + int width, int height) { 1.119 + if (!src_y || !src_u || !src_v || 1.120 + !dst_y || !dst_u || !dst_v || 1.121 + width <= 0 || height == 0) { 1.122 + return -1; 1.123 + } 1.124 + // Negative height means invert the image. 1.125 + if (height < 0) { 1.126 + height = -height; 1.127 + src_y = src_y + (height - 1) * src_stride_y; 1.128 + src_u = src_u + (height - 1) * src_stride_u; 1.129 + src_v = src_v + (height - 1) * src_stride_v; 1.130 + src_stride_y = -src_stride_y; 1.131 + src_stride_u = -src_stride_u; 1.132 + src_stride_v = -src_stride_v; 1.133 + } 1.134 + 1.135 + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 1.136 + CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); 1.137 + CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); 1.138 + return 0; 1.139 +} 1.140 + 1.141 +// Copy I400. 1.142 +LIBYUV_API 1.143 +int I400ToI400(const uint8* src_y, int src_stride_y, 1.144 + uint8* dst_y, int dst_stride_y, 1.145 + int width, int height) { 1.146 + if (!src_y || !dst_y || width <= 0 || height == 0) { 1.147 + return -1; 1.148 + } 1.149 + // Negative height means invert the image. 1.150 + if (height < 0) { 1.151 + height = -height; 1.152 + src_y = src_y + (height - 1) * src_stride_y; 1.153 + src_stride_y = -src_stride_y; 1.154 + } 1.155 + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 1.156 + return 0; 1.157 +} 1.158 + 1.159 +// Convert I420 to I400. 1.160 +LIBYUV_API 1.161 +int I420ToI400(const uint8* src_y, int src_stride_y, 1.162 + const uint8* src_u, int src_stride_u, 1.163 + const uint8* src_v, int src_stride_v, 1.164 + uint8* dst_y, int dst_stride_y, 1.165 + int width, int height) { 1.166 + if (!src_y || !dst_y || width <= 0 || height == 0) { 1.167 + return -1; 1.168 + } 1.169 + // Negative height means invert the image. 1.170 + if (height < 0) { 1.171 + height = -height; 1.172 + src_y = src_y + (height - 1) * src_stride_y; 1.173 + src_stride_y = -src_stride_y; 1.174 + } 1.175 + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 1.176 + return 0; 1.177 +} 1.178 + 1.179 +// Mirror a plane of data. 1.180 +void MirrorPlane(const uint8* src_y, int src_stride_y, 1.181 + uint8* dst_y, int dst_stride_y, 1.182 + int width, int height) { 1.183 + int y; 1.184 + void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; 1.185 + // Negative height means invert the image. 1.186 + if (height < 0) { 1.187 + height = -height; 1.188 + src_y = src_y + (height - 1) * src_stride_y; 1.189 + src_stride_y = -src_stride_y; 1.190 + } 1.191 +#if defined(HAS_MIRRORROW_NEON) 1.192 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { 1.193 + MirrorRow = MirrorRow_NEON; 1.194 + } 1.195 +#endif 1.196 +#if defined(HAS_MIRRORROW_SSE2) 1.197 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { 1.198 + MirrorRow = MirrorRow_SSE2; 1.199 + } 1.200 +#endif 1.201 +#if defined(HAS_MIRRORROW_SSSE3) 1.202 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && 1.203 + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 1.204 + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1.205 + MirrorRow = MirrorRow_SSSE3; 1.206 + } 1.207 +#endif 1.208 +#if defined(HAS_MIRRORROW_AVX2) 1.209 + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { 1.210 + MirrorRow = MirrorRow_AVX2; 1.211 + } 1.212 +#endif 1.213 + 1.214 + // Mirror plane 1.215 + for (y = 0; y < height; ++y) { 1.216 + MirrorRow(src_y, dst_y, width); 1.217 + src_y += src_stride_y; 1.218 + dst_y += dst_stride_y; 1.219 + } 1.220 +} 1.221 + 1.222 +// Convert YUY2 to I422. 1.223 +LIBYUV_API 1.224 +int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, 1.225 + uint8* dst_y, int dst_stride_y, 1.226 + uint8* dst_u, int dst_stride_u, 1.227 + uint8* dst_v, int dst_stride_v, 1.228 + int width, int height) { 1.229 + int y; 1.230 + void (*YUY2ToUV422Row)(const uint8* src_yuy2, 1.231 + uint8* dst_u, uint8* dst_v, int pix) = 1.232 + YUY2ToUV422Row_C; 1.233 + void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = 1.234 + YUY2ToYRow_C; 1.235 + // Negative height means invert the image. 1.236 + if (height < 0) { 1.237 + height = -height; 1.238 + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; 1.239 + src_stride_yuy2 = -src_stride_yuy2; 1.240 + } 1.241 + // Coalesce rows. 1.242 + if (src_stride_yuy2 == width * 2 && 1.243 + dst_stride_y == width && 1.244 + dst_stride_u * 2 == width && 1.245 + dst_stride_v * 2 == width) { 1.246 + width *= height; 1.247 + height = 1; 1.248 + src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; 1.249 + } 1.250 +#if defined(HAS_YUY2TOYROW_SSE2) 1.251 + if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { 1.252 + YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; 1.253 + YUY2ToYRow = YUY2ToYRow_Any_SSE2; 1.254 + if (IS_ALIGNED(width, 16)) { 1.255 + YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; 1.256 + YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; 1.257 + if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { 1.258 + YUY2ToUV422Row = YUY2ToUV422Row_SSE2; 1.259 + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1.260 + YUY2ToYRow = YUY2ToYRow_SSE2; 1.261 + } 1.262 + } 1.263 + } 1.264 + } 1.265 +#endif 1.266 +#if defined(HAS_YUY2TOYROW_AVX2) 1.267 + if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { 1.268 + YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; 1.269 + YUY2ToYRow = YUY2ToYRow_Any_AVX2; 1.270 + if (IS_ALIGNED(width, 32)) { 1.271 + YUY2ToUV422Row = YUY2ToUV422Row_AVX2; 1.272 + YUY2ToYRow = YUY2ToYRow_AVX2; 1.273 + } 1.274 + } 1.275 +#endif 1.276 +#if defined(HAS_YUY2TOYROW_NEON) 1.277 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.278 + YUY2ToYRow = YUY2ToYRow_Any_NEON; 1.279 + if (width >= 16) { 1.280 + YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; 1.281 + } 1.282 + if (IS_ALIGNED(width, 16)) { 1.283 + YUY2ToYRow = YUY2ToYRow_NEON; 1.284 + YUY2ToUV422Row = YUY2ToUV422Row_NEON; 1.285 + } 1.286 + } 1.287 +#endif 1.288 + 1.289 + for (y = 0; y < height; ++y) { 1.290 + YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); 1.291 + YUY2ToYRow(src_yuy2, dst_y, width); 1.292 + src_yuy2 += src_stride_yuy2; 1.293 + dst_y += dst_stride_y; 1.294 + dst_u += dst_stride_u; 1.295 + dst_v += dst_stride_v; 1.296 + } 1.297 + return 0; 1.298 +} 1.299 + 1.300 +// Convert UYVY to I422. 1.301 +LIBYUV_API 1.302 +int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, 1.303 + uint8* dst_y, int dst_stride_y, 1.304 + uint8* dst_u, int dst_stride_u, 1.305 + uint8* dst_v, int dst_stride_v, 1.306 + int width, int height) { 1.307 + int y; 1.308 + void (*UYVYToUV422Row)(const uint8* src_uyvy, 1.309 + uint8* dst_u, uint8* dst_v, int pix) = 1.310 + UYVYToUV422Row_C; 1.311 + void (*UYVYToYRow)(const uint8* src_uyvy, 1.312 + uint8* dst_y, int pix) = UYVYToYRow_C; 1.313 + // Negative height means invert the image. 1.314 + if (height < 0) { 1.315 + height = -height; 1.316 + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; 1.317 + src_stride_uyvy = -src_stride_uyvy; 1.318 + } 1.319 + // Coalesce rows. 1.320 + if (src_stride_uyvy == width * 2 && 1.321 + dst_stride_y == width && 1.322 + dst_stride_u * 2 == width && 1.323 + dst_stride_v * 2 == width) { 1.324 + width *= height; 1.325 + height = 1; 1.326 + src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; 1.327 + } 1.328 +#if defined(HAS_UYVYTOYROW_SSE2) 1.329 + if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { 1.330 + UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; 1.331 + UYVYToYRow = UYVYToYRow_Any_SSE2; 1.332 + if (IS_ALIGNED(width, 16)) { 1.333 + UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2; 1.334 + UYVYToYRow = UYVYToYRow_Unaligned_SSE2; 1.335 + if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { 1.336 + UYVYToUV422Row = UYVYToUV422Row_SSE2; 1.337 + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1.338 + UYVYToYRow = UYVYToYRow_SSE2; 1.339 + } 1.340 + } 1.341 + } 1.342 + } 1.343 +#endif 1.344 +#if defined(HAS_UYVYTOYROW_AVX2) 1.345 + if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { 1.346 + UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; 1.347 + UYVYToYRow = UYVYToYRow_Any_AVX2; 1.348 + if (IS_ALIGNED(width, 32)) { 1.349 + UYVYToUV422Row = UYVYToUV422Row_AVX2; 1.350 + UYVYToYRow = UYVYToYRow_AVX2; 1.351 + } 1.352 + } 1.353 +#endif 1.354 +#if defined(HAS_UYVYTOYROW_NEON) 1.355 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.356 + UYVYToYRow = UYVYToYRow_Any_NEON; 1.357 + if (width >= 16) { 1.358 + UYVYToUV422Row = UYVYToUV422Row_Any_NEON; 1.359 + } 1.360 + if (IS_ALIGNED(width, 16)) { 1.361 + UYVYToYRow = UYVYToYRow_NEON; 1.362 + UYVYToUV422Row = UYVYToUV422Row_NEON; 1.363 + } 1.364 + } 1.365 +#endif 1.366 + 1.367 + for (y = 0; y < height; ++y) { 1.368 + UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); 1.369 + UYVYToYRow(src_uyvy, dst_y, width); 1.370 + src_uyvy += src_stride_uyvy; 1.371 + dst_y += dst_stride_y; 1.372 + dst_u += dst_stride_u; 1.373 + dst_v += dst_stride_v; 1.374 + } 1.375 + return 0; 1.376 +} 1.377 + 1.378 +// Mirror I400 with optional flipping 1.379 +LIBYUV_API 1.380 +int I400Mirror(const uint8* src_y, int src_stride_y, 1.381 + uint8* dst_y, int dst_stride_y, 1.382 + int width, int height) { 1.383 + if (!src_y || !dst_y || 1.384 + width <= 0 || height == 0) { 1.385 + return -1; 1.386 + } 1.387 + // Negative height means invert the image. 1.388 + if (height < 0) { 1.389 + height = -height; 1.390 + src_y = src_y + (height - 1) * src_stride_y; 1.391 + src_stride_y = -src_stride_y; 1.392 + } 1.393 + 1.394 + MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 1.395 + return 0; 1.396 +} 1.397 + 1.398 +// Mirror I420 with optional flipping 1.399 +LIBYUV_API 1.400 +int I420Mirror(const uint8* src_y, int src_stride_y, 1.401 + const uint8* src_u, int src_stride_u, 1.402 + const uint8* src_v, int src_stride_v, 1.403 + uint8* dst_y, int dst_stride_y, 1.404 + uint8* dst_u, int dst_stride_u, 1.405 + uint8* dst_v, int dst_stride_v, 1.406 + int width, int height) { 1.407 + int halfwidth = (width + 1) >> 1; 1.408 + int halfheight = (height + 1) >> 1; 1.409 + if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || 1.410 + width <= 0 || height == 0) { 1.411 + return -1; 1.412 + } 1.413 + // Negative height means invert the image. 1.414 + if (height < 0) { 1.415 + height = -height; 1.416 + halfheight = (height + 1) >> 1; 1.417 + src_y = src_y + (height - 1) * src_stride_y; 1.418 + src_u = src_u + (halfheight - 1) * src_stride_u; 1.419 + src_v = src_v + (halfheight - 1) * src_stride_v; 1.420 + src_stride_y = -src_stride_y; 1.421 + src_stride_u = -src_stride_u; 1.422 + src_stride_v = -src_stride_v; 1.423 + } 1.424 + 1.425 + if (dst_y) { 1.426 + MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); 1.427 + } 1.428 + MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); 1.429 + MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); 1.430 + return 0; 1.431 +} 1.432 + 1.433 +// ARGB mirror. 1.434 +LIBYUV_API 1.435 +int ARGBMirror(const uint8* src_argb, int src_stride_argb, 1.436 + uint8* dst_argb, int dst_stride_argb, 1.437 + int width, int height) { 1.438 + int y; 1.439 + void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = 1.440 + ARGBMirrorRow_C; 1.441 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.442 + return -1; 1.443 + } 1.444 + // Negative height means invert the image. 1.445 + if (height < 0) { 1.446 + height = -height; 1.447 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.448 + src_stride_argb = -src_stride_argb; 1.449 + } 1.450 + 1.451 +#if defined(HAS_ARGBMIRRORROW_SSSE3) 1.452 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && 1.453 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1.454 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.455 + ARGBMirrorRow = ARGBMirrorRow_SSSE3; 1.456 + } 1.457 +#endif 1.458 +#if defined(HAS_ARGBMIRRORROW_AVX2) 1.459 + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { 1.460 + ARGBMirrorRow = ARGBMirrorRow_AVX2; 1.461 + } 1.462 +#endif 1.463 +#if defined(HAS_ARGBMIRRORROW_NEON) 1.464 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { 1.465 + ARGBMirrorRow = ARGBMirrorRow_NEON; 1.466 + } 1.467 +#endif 1.468 + 1.469 + // Mirror plane 1.470 + for (y = 0; y < height; ++y) { 1.471 + ARGBMirrorRow(src_argb, dst_argb, width); 1.472 + src_argb += src_stride_argb; 1.473 + dst_argb += dst_stride_argb; 1.474 + } 1.475 + return 0; 1.476 +} 1.477 + 1.478 +// Get a blender that optimized for the CPU, alignment and pixel count. 1.479 +// As there are 6 blenders to choose from, the caller should try to use 1.480 +// the same blend function for all pixels if possible. 1.481 +LIBYUV_API 1.482 +ARGBBlendRow GetARGBBlend() { 1.483 + void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 1.484 + uint8* dst_argb, int width) = ARGBBlendRow_C; 1.485 +#if defined(HAS_ARGBBLENDROW_SSSE3) 1.486 + if (TestCpuFlag(kCpuHasSSSE3)) { 1.487 + ARGBBlendRow = ARGBBlendRow_SSSE3; 1.488 + return ARGBBlendRow; 1.489 + } 1.490 +#endif 1.491 +#if defined(HAS_ARGBBLENDROW_SSE2) 1.492 + if (TestCpuFlag(kCpuHasSSE2)) { 1.493 + ARGBBlendRow = ARGBBlendRow_SSE2; 1.494 + } 1.495 +#endif 1.496 +#if defined(HAS_ARGBBLENDROW_NEON) 1.497 + if (TestCpuFlag(kCpuHasNEON)) { 1.498 + ARGBBlendRow = ARGBBlendRow_NEON; 1.499 + } 1.500 +#endif 1.501 + return ARGBBlendRow; 1.502 +} 1.503 + 1.504 +// Alpha Blend 2 ARGB images and store to destination. 1.505 +LIBYUV_API 1.506 +int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, 1.507 + const uint8* src_argb1, int src_stride_argb1, 1.508 + uint8* dst_argb, int dst_stride_argb, 1.509 + int width, int height) { 1.510 + int y; 1.511 + void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, 1.512 + uint8* dst_argb, int width) = GetARGBBlend(); 1.513 + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1.514 + return -1; 1.515 + } 1.516 + // Negative height means invert the image. 1.517 + if (height < 0) { 1.518 + height = -height; 1.519 + dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1.520 + dst_stride_argb = -dst_stride_argb; 1.521 + } 1.522 + // Coalesce rows. 1.523 + if (src_stride_argb0 == width * 4 && 1.524 + src_stride_argb1 == width * 4 && 1.525 + dst_stride_argb == width * 4) { 1.526 + width *= height; 1.527 + height = 1; 1.528 + src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1.529 + } 1.530 + 1.531 + for (y = 0; y < height; ++y) { 1.532 + ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); 1.533 + src_argb0 += src_stride_argb0; 1.534 + src_argb1 += src_stride_argb1; 1.535 + dst_argb += dst_stride_argb; 1.536 + } 1.537 + return 0; 1.538 +} 1.539 + 1.540 +// Multiply 2 ARGB images and store to destination. 1.541 +LIBYUV_API 1.542 +int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, 1.543 + const uint8* src_argb1, int src_stride_argb1, 1.544 + uint8* dst_argb, int dst_stride_argb, 1.545 + int width, int height) { 1.546 + int y; 1.547 + void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, 1.548 + int width) = ARGBMultiplyRow_C; 1.549 + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1.550 + return -1; 1.551 + } 1.552 + // Negative height means invert the image. 1.553 + if (height < 0) { 1.554 + height = -height; 1.555 + dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1.556 + dst_stride_argb = -dst_stride_argb; 1.557 + } 1.558 + // Coalesce rows. 1.559 + if (src_stride_argb0 == width * 4 && 1.560 + src_stride_argb1 == width * 4 && 1.561 + dst_stride_argb == width * 4) { 1.562 + width *= height; 1.563 + height = 1; 1.564 + src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1.565 + } 1.566 +#if defined(HAS_ARGBMULTIPLYROW_SSE2) 1.567 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1.568 + ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; 1.569 + if (IS_ALIGNED(width, 4)) { 1.570 + ARGBMultiplyRow = ARGBMultiplyRow_SSE2; 1.571 + } 1.572 + } 1.573 +#endif 1.574 +#if defined(HAS_ARGBMULTIPLYROW_AVX2) 1.575 + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1.576 + ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; 1.577 + if (IS_ALIGNED(width, 8)) { 1.578 + ARGBMultiplyRow = ARGBMultiplyRow_AVX2; 1.579 + } 1.580 + } 1.581 +#endif 1.582 +#if defined(HAS_ARGBMULTIPLYROW_NEON) 1.583 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.584 + ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; 1.585 + if (IS_ALIGNED(width, 8)) { 1.586 + ARGBMultiplyRow = ARGBMultiplyRow_NEON; 1.587 + } 1.588 + } 1.589 +#endif 1.590 + 1.591 + // Multiply plane 1.592 + for (y = 0; y < height; ++y) { 1.593 + ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); 1.594 + src_argb0 += src_stride_argb0; 1.595 + src_argb1 += src_stride_argb1; 1.596 + dst_argb += dst_stride_argb; 1.597 + } 1.598 + return 0; 1.599 +} 1.600 + 1.601 +// Add 2 ARGB images and store to destination. 1.602 +LIBYUV_API 1.603 +int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, 1.604 + const uint8* src_argb1, int src_stride_argb1, 1.605 + uint8* dst_argb, int dst_stride_argb, 1.606 + int width, int height) { 1.607 + int y; 1.608 + void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, 1.609 + int width) = ARGBAddRow_C; 1.610 + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1.611 + return -1; 1.612 + } 1.613 + // Negative height means invert the image. 1.614 + if (height < 0) { 1.615 + height = -height; 1.616 + dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1.617 + dst_stride_argb = -dst_stride_argb; 1.618 + } 1.619 + // Coalesce rows. 1.620 + if (src_stride_argb0 == width * 4 && 1.621 + src_stride_argb1 == width * 4 && 1.622 + dst_stride_argb == width * 4) { 1.623 + width *= height; 1.624 + height = 1; 1.625 + src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1.626 + } 1.627 +#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER) 1.628 + if (TestCpuFlag(kCpuHasSSE2)) { 1.629 + ARGBAddRow = ARGBAddRow_SSE2; 1.630 + } 1.631 +#endif 1.632 +#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER) 1.633 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1.634 + ARGBAddRow = ARGBAddRow_Any_SSE2; 1.635 + if (IS_ALIGNED(width, 4)) { 1.636 + ARGBAddRow = ARGBAddRow_SSE2; 1.637 + } 1.638 + } 1.639 +#endif 1.640 +#if defined(HAS_ARGBADDROW_AVX2) 1.641 + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1.642 + ARGBAddRow = ARGBAddRow_Any_AVX2; 1.643 + if (IS_ALIGNED(width, 8)) { 1.644 + ARGBAddRow = ARGBAddRow_AVX2; 1.645 + } 1.646 + } 1.647 +#endif 1.648 +#if defined(HAS_ARGBADDROW_NEON) 1.649 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.650 + ARGBAddRow = ARGBAddRow_Any_NEON; 1.651 + if (IS_ALIGNED(width, 8)) { 1.652 + ARGBAddRow = ARGBAddRow_NEON; 1.653 + } 1.654 + } 1.655 +#endif 1.656 + 1.657 + // Add plane 1.658 + for (y = 0; y < height; ++y) { 1.659 + ARGBAddRow(src_argb0, src_argb1, dst_argb, width); 1.660 + src_argb0 += src_stride_argb0; 1.661 + src_argb1 += src_stride_argb1; 1.662 + dst_argb += dst_stride_argb; 1.663 + } 1.664 + return 0; 1.665 +} 1.666 + 1.667 +// Subtract 2 ARGB images and store to destination. 1.668 +LIBYUV_API 1.669 +int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, 1.670 + const uint8* src_argb1, int src_stride_argb1, 1.671 + uint8* dst_argb, int dst_stride_argb, 1.672 + int width, int height) { 1.673 + int y; 1.674 + void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, 1.675 + int width) = ARGBSubtractRow_C; 1.676 + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1.677 + return -1; 1.678 + } 1.679 + // Negative height means invert the image. 1.680 + if (height < 0) { 1.681 + height = -height; 1.682 + dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1.683 + dst_stride_argb = -dst_stride_argb; 1.684 + } 1.685 + // Coalesce rows. 1.686 + if (src_stride_argb0 == width * 4 && 1.687 + src_stride_argb1 == width * 4 && 1.688 + dst_stride_argb == width * 4) { 1.689 + width *= height; 1.690 + height = 1; 1.691 + src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1.692 + } 1.693 +#if defined(HAS_ARGBSUBTRACTROW_SSE2) 1.694 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1.695 + ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; 1.696 + if (IS_ALIGNED(width, 4)) { 1.697 + ARGBSubtractRow = ARGBSubtractRow_SSE2; 1.698 + } 1.699 + } 1.700 +#endif 1.701 +#if defined(HAS_ARGBSUBTRACTROW_AVX2) 1.702 + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1.703 + ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; 1.704 + if (IS_ALIGNED(width, 8)) { 1.705 + ARGBSubtractRow = ARGBSubtractRow_AVX2; 1.706 + } 1.707 + } 1.708 +#endif 1.709 +#if defined(HAS_ARGBSUBTRACTROW_NEON) 1.710 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.711 + ARGBSubtractRow = ARGBSubtractRow_Any_NEON; 1.712 + if (IS_ALIGNED(width, 8)) { 1.713 + ARGBSubtractRow = ARGBSubtractRow_NEON; 1.714 + } 1.715 + } 1.716 +#endif 1.717 + 1.718 + // Subtract plane 1.719 + for (y = 0; y < height; ++y) { 1.720 + ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); 1.721 + src_argb0 += src_stride_argb0; 1.722 + src_argb1 += src_stride_argb1; 1.723 + dst_argb += dst_stride_argb; 1.724 + } 1.725 + return 0; 1.726 +} 1.727 + 1.728 +// Convert I422 to BGRA. 1.729 +LIBYUV_API 1.730 +int I422ToBGRA(const uint8* src_y, int src_stride_y, 1.731 + const uint8* src_u, int src_stride_u, 1.732 + const uint8* src_v, int src_stride_v, 1.733 + uint8* dst_bgra, int dst_stride_bgra, 1.734 + int width, int height) { 1.735 + int y; 1.736 + void (*I422ToBGRARow)(const uint8* y_buf, 1.737 + const uint8* u_buf, 1.738 + const uint8* v_buf, 1.739 + uint8* rgb_buf, 1.740 + int width) = I422ToBGRARow_C; 1.741 + if (!src_y || !src_u || !src_v || 1.742 + !dst_bgra || 1.743 + width <= 0 || height == 0) { 1.744 + return -1; 1.745 + } 1.746 + // Negative height means invert the image. 1.747 + if (height < 0) { 1.748 + height = -height; 1.749 + dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; 1.750 + dst_stride_bgra = -dst_stride_bgra; 1.751 + } 1.752 + // Coalesce rows. 1.753 + if (src_stride_y == width && 1.754 + src_stride_u * 2 == width && 1.755 + src_stride_v * 2 == width && 1.756 + dst_stride_bgra == width * 4) { 1.757 + width *= height; 1.758 + height = 1; 1.759 + src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0; 1.760 + } 1.761 +#if defined(HAS_I422TOBGRAROW_NEON) 1.762 + if (TestCpuFlag(kCpuHasNEON)) { 1.763 + I422ToBGRARow = I422ToBGRARow_Any_NEON; 1.764 + if (IS_ALIGNED(width, 16)) { 1.765 + I422ToBGRARow = I422ToBGRARow_NEON; 1.766 + } 1.767 + } 1.768 +#elif defined(HAS_I422TOBGRAROW_SSSE3) 1.769 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1.770 + I422ToBGRARow = I422ToBGRARow_Any_SSSE3; 1.771 + if (IS_ALIGNED(width, 8)) { 1.772 + I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3; 1.773 + if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { 1.774 + I422ToBGRARow = I422ToBGRARow_SSSE3; 1.775 + } 1.776 + } 1.777 + } 1.778 +#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2) 1.779 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && 1.780 + IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 1.781 + IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 1.782 + IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 1.783 + IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { 1.784 + I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; 1.785 + } 1.786 +#endif 1.787 + 1.788 + for (y = 0; y < height; ++y) { 1.789 + I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); 1.790 + dst_bgra += dst_stride_bgra; 1.791 + src_y += src_stride_y; 1.792 + src_u += src_stride_u; 1.793 + src_v += src_stride_v; 1.794 + } 1.795 + return 0; 1.796 +} 1.797 + 1.798 +// Convert I422 to ABGR. 1.799 +LIBYUV_API 1.800 +int I422ToABGR(const uint8* src_y, int src_stride_y, 1.801 + const uint8* src_u, int src_stride_u, 1.802 + const uint8* src_v, int src_stride_v, 1.803 + uint8* dst_abgr, int dst_stride_abgr, 1.804 + int width, int height) { 1.805 + int y; 1.806 + void (*I422ToABGRRow)(const uint8* y_buf, 1.807 + const uint8* u_buf, 1.808 + const uint8* v_buf, 1.809 + uint8* rgb_buf, 1.810 + int width) = I422ToABGRRow_C; 1.811 + if (!src_y || !src_u || !src_v || 1.812 + !dst_abgr || 1.813 + width <= 0 || height == 0) { 1.814 + return -1; 1.815 + } 1.816 + // Negative height means invert the image. 1.817 + if (height < 0) { 1.818 + height = -height; 1.819 + dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; 1.820 + dst_stride_abgr = -dst_stride_abgr; 1.821 + } 1.822 + // Coalesce rows. 1.823 + if (src_stride_y == width && 1.824 + src_stride_u * 2 == width && 1.825 + src_stride_v * 2 == width && 1.826 + dst_stride_abgr == width * 4) { 1.827 + width *= height; 1.828 + height = 1; 1.829 + src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; 1.830 + } 1.831 +#if defined(HAS_I422TOABGRROW_NEON) 1.832 + if (TestCpuFlag(kCpuHasNEON)) { 1.833 + I422ToABGRRow = I422ToABGRRow_Any_NEON; 1.834 + if (IS_ALIGNED(width, 16)) { 1.835 + I422ToABGRRow = I422ToABGRRow_NEON; 1.836 + } 1.837 + } 1.838 +#elif defined(HAS_I422TOABGRROW_SSSE3) 1.839 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1.840 + I422ToABGRRow = I422ToABGRRow_Any_SSSE3; 1.841 + if (IS_ALIGNED(width, 8)) { 1.842 + I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3; 1.843 + if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { 1.844 + I422ToABGRRow = I422ToABGRRow_SSSE3; 1.845 + } 1.846 + } 1.847 + } 1.848 +#endif 1.849 + 1.850 + for (y = 0; y < height; ++y) { 1.851 + I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); 1.852 + dst_abgr += dst_stride_abgr; 1.853 + src_y += src_stride_y; 1.854 + src_u += src_stride_u; 1.855 + src_v += src_stride_v; 1.856 + } 1.857 + return 0; 1.858 +} 1.859 + 1.860 +// Convert I422 to RGBA. 1.861 +LIBYUV_API 1.862 +int I422ToRGBA(const uint8* src_y, int src_stride_y, 1.863 + const uint8* src_u, int src_stride_u, 1.864 + const uint8* src_v, int src_stride_v, 1.865 + uint8* dst_rgba, int dst_stride_rgba, 1.866 + int width, int height) { 1.867 + int y; 1.868 + void (*I422ToRGBARow)(const uint8* y_buf, 1.869 + const uint8* u_buf, 1.870 + const uint8* v_buf, 1.871 + uint8* rgb_buf, 1.872 + int width) = I422ToRGBARow_C; 1.873 + if (!src_y || !src_u || !src_v || 1.874 + !dst_rgba || 1.875 + width <= 0 || height == 0) { 1.876 + return -1; 1.877 + } 1.878 + // Negative height means invert the image. 1.879 + if (height < 0) { 1.880 + height = -height; 1.881 + dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; 1.882 + dst_stride_rgba = -dst_stride_rgba; 1.883 + } 1.884 + // Coalesce rows. 1.885 + if (src_stride_y == width && 1.886 + src_stride_u * 2 == width && 1.887 + src_stride_v * 2 == width && 1.888 + dst_stride_rgba == width * 4) { 1.889 + width *= height; 1.890 + height = 1; 1.891 + src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0; 1.892 + } 1.893 +#if defined(HAS_I422TORGBAROW_NEON) 1.894 + if (TestCpuFlag(kCpuHasNEON)) { 1.895 + I422ToRGBARow = I422ToRGBARow_Any_NEON; 1.896 + if (IS_ALIGNED(width, 16)) { 1.897 + I422ToRGBARow = I422ToRGBARow_NEON; 1.898 + } 1.899 + } 1.900 +#elif defined(HAS_I422TORGBAROW_SSSE3) 1.901 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1.902 + I422ToRGBARow = I422ToRGBARow_Any_SSSE3; 1.903 + if (IS_ALIGNED(width, 8)) { 1.904 + I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; 1.905 + if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) { 1.906 + I422ToRGBARow = I422ToRGBARow_SSSE3; 1.907 + } 1.908 + } 1.909 + } 1.910 +#endif 1.911 + 1.912 + for (y = 0; y < height; ++y) { 1.913 + I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); 1.914 + dst_rgba += dst_stride_rgba; 1.915 + src_y += src_stride_y; 1.916 + src_u += src_stride_u; 1.917 + src_v += src_stride_v; 1.918 + } 1.919 + return 0; 1.920 +} 1.921 + 1.922 +// Convert NV12 to RGB565. 1.923 +LIBYUV_API 1.924 +int NV12ToRGB565(const uint8* src_y, int src_stride_y, 1.925 + const uint8* src_uv, int src_stride_uv, 1.926 + uint8* dst_rgb565, int dst_stride_rgb565, 1.927 + int width, int height) { 1.928 + int y; 1.929 + void (*NV12ToRGB565Row)(const uint8* y_buf, 1.930 + const uint8* uv_buf, 1.931 + uint8* rgb_buf, 1.932 + int width) = NV12ToRGB565Row_C; 1.933 + if (!src_y || !src_uv || !dst_rgb565 || 1.934 + width <= 0 || height == 0) { 1.935 + return -1; 1.936 + } 1.937 + // Negative height means invert the image. 1.938 + if (height < 0) { 1.939 + height = -height; 1.940 + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 1.941 + dst_stride_rgb565 = -dst_stride_rgb565; 1.942 + } 1.943 +#if defined(HAS_NV12TORGB565ROW_SSSE3) 1.944 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1.945 + NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; 1.946 + if (IS_ALIGNED(width, 8)) { 1.947 + NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; 1.948 + } 1.949 + } 1.950 +#elif defined(HAS_NV12TORGB565ROW_NEON) 1.951 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.952 + NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; 1.953 + if (IS_ALIGNED(width, 8)) { 1.954 + NV12ToRGB565Row = NV12ToRGB565Row_NEON; 1.955 + } 1.956 + } 1.957 +#endif 1.958 + 1.959 + for (y = 0; y < height; ++y) { 1.960 + NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width); 1.961 + dst_rgb565 += dst_stride_rgb565; 1.962 + src_y += src_stride_y; 1.963 + if (y & 1) { 1.964 + src_uv += src_stride_uv; 1.965 + } 1.966 + } 1.967 + return 0; 1.968 +} 1.969 + 1.970 +// Convert NV21 to RGB565. 1.971 +LIBYUV_API 1.972 +int NV21ToRGB565(const uint8* src_y, int src_stride_y, 1.973 + const uint8* src_vu, int src_stride_vu, 1.974 + uint8* dst_rgb565, int dst_stride_rgb565, 1.975 + int width, int height) { 1.976 + int y; 1.977 + void (*NV21ToRGB565Row)(const uint8* y_buf, 1.978 + const uint8* src_vu, 1.979 + uint8* rgb_buf, 1.980 + int width) = NV21ToRGB565Row_C; 1.981 + if (!src_y || !src_vu || !dst_rgb565 || 1.982 + width <= 0 || height == 0) { 1.983 + return -1; 1.984 + } 1.985 + // Negative height means invert the image. 1.986 + if (height < 0) { 1.987 + height = -height; 1.988 + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; 1.989 + dst_stride_rgb565 = -dst_stride_rgb565; 1.990 + } 1.991 +#if defined(HAS_NV21TORGB565ROW_SSSE3) 1.992 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1.993 + NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3; 1.994 + if (IS_ALIGNED(width, 8)) { 1.995 + NV21ToRGB565Row = NV21ToRGB565Row_SSSE3; 1.996 + } 1.997 + } 1.998 +#elif defined(HAS_NV21TORGB565ROW_NEON) 1.999 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.1000 + NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON; 1.1001 + if (IS_ALIGNED(width, 8)) { 1.1002 + NV21ToRGB565Row = NV21ToRGB565Row_NEON; 1.1003 + } 1.1004 + } 1.1005 +#endif 1.1006 + 1.1007 + for (y = 0; y < height; ++y) { 1.1008 + NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width); 1.1009 + dst_rgb565 += dst_stride_rgb565; 1.1010 + src_y += src_stride_y; 1.1011 + if (y & 1) { 1.1012 + src_vu += src_stride_vu; 1.1013 + } 1.1014 + } 1.1015 + return 0; 1.1016 +} 1.1017 + 1.1018 +LIBYUV_API 1.1019 +void SetPlane(uint8* dst_y, int dst_stride_y, 1.1020 + int width, int height, 1.1021 + uint32 value) { 1.1022 + int y; 1.1023 + uint32 v32 = value | (value << 8) | (value << 16) | (value << 24); 1.1024 + void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; 1.1025 + // Coalesce rows. 1.1026 + if (dst_stride_y == width) { 1.1027 + width *= height; 1.1028 + height = 1; 1.1029 + dst_stride_y = 0; 1.1030 + } 1.1031 +#if defined(HAS_SETROW_NEON) 1.1032 + if (TestCpuFlag(kCpuHasNEON) && 1.1033 + IS_ALIGNED(width, 16) && 1.1034 + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1.1035 + SetRow = SetRow_NEON; 1.1036 + } 1.1037 +#endif 1.1038 +#if defined(HAS_SETROW_X86) 1.1039 + if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { 1.1040 + SetRow = SetRow_X86; 1.1041 + } 1.1042 +#endif 1.1043 + 1.1044 + // Set plane 1.1045 + for (y = 0; y < height; ++y) { 1.1046 + SetRow(dst_y, v32, width); 1.1047 + dst_y += dst_stride_y; 1.1048 + } 1.1049 +} 1.1050 + 1.1051 +// Draw a rectangle into I420 1.1052 +LIBYUV_API 1.1053 +int I420Rect(uint8* dst_y, int dst_stride_y, 1.1054 + uint8* dst_u, int dst_stride_u, 1.1055 + uint8* dst_v, int dst_stride_v, 1.1056 + int x, int y, 1.1057 + int width, int height, 1.1058 + int value_y, int value_u, int value_v) { 1.1059 + int halfwidth = (width + 1) >> 1; 1.1060 + int halfheight = (height + 1) >> 1; 1.1061 + uint8* start_y = dst_y + y * dst_stride_y + x; 1.1062 + uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); 1.1063 + uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); 1.1064 + if (!dst_y || !dst_u || !dst_v || 1.1065 + width <= 0 || height <= 0 || 1.1066 + x < 0 || y < 0 || 1.1067 + value_y < 0 || value_y > 255 || 1.1068 + value_u < 0 || value_u > 255 || 1.1069 + value_v < 0 || value_v > 255) { 1.1070 + return -1; 1.1071 + } 1.1072 + 1.1073 + SetPlane(start_y, dst_stride_y, width, height, value_y); 1.1074 + SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); 1.1075 + SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); 1.1076 + return 0; 1.1077 +} 1.1078 + 1.1079 +// Draw a rectangle into ARGB 1.1080 +LIBYUV_API 1.1081 +int ARGBRect(uint8* dst_argb, int dst_stride_argb, 1.1082 + int dst_x, int dst_y, 1.1083 + int width, int height, 1.1084 + uint32 value) { 1.1085 + if (!dst_argb || 1.1086 + width <= 0 || height <= 0 || 1.1087 + dst_x < 0 || dst_y < 0) { 1.1088 + return -1; 1.1089 + } 1.1090 + dst_argb += dst_y * dst_stride_argb + dst_x * 4; 1.1091 + // Coalesce rows. 1.1092 + if (dst_stride_argb == width * 4) { 1.1093 + width *= height; 1.1094 + height = 1; 1.1095 + dst_stride_argb = 0; 1.1096 + } 1.1097 +#if defined(HAS_SETROW_NEON) 1.1098 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && 1.1099 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1100 + ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height); 1.1101 + return 0; 1.1102 + } 1.1103 +#endif 1.1104 +#if defined(HAS_SETROW_X86) 1.1105 + if (TestCpuFlag(kCpuHasX86)) { 1.1106 + ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height); 1.1107 + return 0; 1.1108 + } 1.1109 +#endif 1.1110 + ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height); 1.1111 + return 0; 1.1112 +} 1.1113 + 1.1114 +// Convert unattentuated ARGB to preattenuated ARGB. 1.1115 +// An unattenutated ARGB alpha blend uses the formula 1.1116 +// p = a * f + (1 - a) * b 1.1117 +// where 1.1118 +// p is output pixel 1.1119 +// f is foreground pixel 1.1120 +// b is background pixel 1.1121 +// a is alpha value from foreground pixel 1.1122 +// An preattenutated ARGB alpha blend uses the formula 1.1123 +// p = f + (1 - a) * b 1.1124 +// where 1.1125 +// f is foreground pixel premultiplied by alpha 1.1126 + 1.1127 +LIBYUV_API 1.1128 +int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, 1.1129 + uint8* dst_argb, int dst_stride_argb, 1.1130 + int width, int height) { 1.1131 + int y; 1.1132 + void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, 1.1133 + int width) = ARGBAttenuateRow_C; 1.1134 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.1135 + return -1; 1.1136 + } 1.1137 + if (height < 0) { 1.1138 + height = -height; 1.1139 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1140 + src_stride_argb = -src_stride_argb; 1.1141 + } 1.1142 + // Coalesce rows. 1.1143 + if (src_stride_argb == width * 4 && 1.1144 + dst_stride_argb == width * 4) { 1.1145 + width *= height; 1.1146 + height = 1; 1.1147 + src_stride_argb = dst_stride_argb = 0; 1.1148 + } 1.1149 +#if defined(HAS_ARGBATTENUATEROW_SSE2) 1.1150 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && 1.1151 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1.1152 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1153 + ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2; 1.1154 + if (IS_ALIGNED(width, 4)) { 1.1155 + ARGBAttenuateRow = ARGBAttenuateRow_SSE2; 1.1156 + } 1.1157 + } 1.1158 +#endif 1.1159 +#if defined(HAS_ARGBATTENUATEROW_SSSE3) 1.1160 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { 1.1161 + ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; 1.1162 + if (IS_ALIGNED(width, 4)) { 1.1163 + ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; 1.1164 + } 1.1165 + } 1.1166 +#endif 1.1167 +#if defined(HAS_ARGBATTENUATEROW_AVX2) 1.1168 + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1.1169 + ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; 1.1170 + if (IS_ALIGNED(width, 8)) { 1.1171 + ARGBAttenuateRow = ARGBAttenuateRow_AVX2; 1.1172 + } 1.1173 + } 1.1174 +#endif 1.1175 +#if defined(HAS_ARGBATTENUATEROW_NEON) 1.1176 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.1177 + ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; 1.1178 + if (IS_ALIGNED(width, 8)) { 1.1179 + ARGBAttenuateRow = ARGBAttenuateRow_NEON; 1.1180 + } 1.1181 + } 1.1182 +#endif 1.1183 + 1.1184 + for (y = 0; y < height; ++y) { 1.1185 + ARGBAttenuateRow(src_argb, dst_argb, width); 1.1186 + src_argb += src_stride_argb; 1.1187 + dst_argb += dst_stride_argb; 1.1188 + } 1.1189 + return 0; 1.1190 +} 1.1191 + 1.1192 +// Convert preattentuated ARGB to unattenuated ARGB. 1.1193 +LIBYUV_API 1.1194 +int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, 1.1195 + uint8* dst_argb, int dst_stride_argb, 1.1196 + int width, int height) { 1.1197 + int y; 1.1198 + void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, 1.1199 + int width) = ARGBUnattenuateRow_C; 1.1200 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.1201 + return -1; 1.1202 + } 1.1203 + if (height < 0) { 1.1204 + height = -height; 1.1205 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1206 + src_stride_argb = -src_stride_argb; 1.1207 + } 1.1208 + // Coalesce rows. 1.1209 + if (src_stride_argb == width * 4 && 1.1210 + dst_stride_argb == width * 4) { 1.1211 + width *= height; 1.1212 + height = 1; 1.1213 + src_stride_argb = dst_stride_argb = 0; 1.1214 + } 1.1215 +#if defined(HAS_ARGBUNATTENUATEROW_SSE2) 1.1216 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1.1217 + ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; 1.1218 + if (IS_ALIGNED(width, 4)) { 1.1219 + ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; 1.1220 + } 1.1221 + } 1.1222 +#endif 1.1223 +#if defined(HAS_ARGBUNATTENUATEROW_AVX2) 1.1224 + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1.1225 + ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; 1.1226 + if (IS_ALIGNED(width, 8)) { 1.1227 + ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; 1.1228 + } 1.1229 + } 1.1230 +#endif 1.1231 +// TODO(fbarchard): Neon version. 1.1232 + 1.1233 + for (y = 0; y < height; ++y) { 1.1234 + ARGBUnattenuateRow(src_argb, dst_argb, width); 1.1235 + src_argb += src_stride_argb; 1.1236 + dst_argb += dst_stride_argb; 1.1237 + } 1.1238 + return 0; 1.1239 +} 1.1240 + 1.1241 +// Convert ARGB to Grayed ARGB. 1.1242 +LIBYUV_API 1.1243 +int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, 1.1244 + uint8* dst_argb, int dst_stride_argb, 1.1245 + int width, int height) { 1.1246 + int y; 1.1247 + void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, 1.1248 + int width) = ARGBGrayRow_C; 1.1249 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.1250 + return -1; 1.1251 + } 1.1252 + if (height < 0) { 1.1253 + height = -height; 1.1254 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1255 + src_stride_argb = -src_stride_argb; 1.1256 + } 1.1257 + // Coalesce rows. 1.1258 + if (src_stride_argb == width * 4 && 1.1259 + dst_stride_argb == width * 4) { 1.1260 + width *= height; 1.1261 + height = 1; 1.1262 + src_stride_argb = dst_stride_argb = 0; 1.1263 + } 1.1264 +#if defined(HAS_ARGBGRAYROW_SSSE3) 1.1265 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1.1266 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1.1267 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1268 + ARGBGrayRow = ARGBGrayRow_SSSE3; 1.1269 + } 1.1270 +#elif defined(HAS_ARGBGRAYROW_NEON) 1.1271 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.1272 + ARGBGrayRow = ARGBGrayRow_NEON; 1.1273 + } 1.1274 +#endif 1.1275 + 1.1276 + for (y = 0; y < height; ++y) { 1.1277 + ARGBGrayRow(src_argb, dst_argb, width); 1.1278 + src_argb += src_stride_argb; 1.1279 + dst_argb += dst_stride_argb; 1.1280 + } 1.1281 + return 0; 1.1282 +} 1.1283 + 1.1284 +// Make a rectangle of ARGB gray scale. 1.1285 +LIBYUV_API 1.1286 +int ARGBGray(uint8* dst_argb, int dst_stride_argb, 1.1287 + int dst_x, int dst_y, 1.1288 + int width, int height) { 1.1289 + int y; 1.1290 + void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, 1.1291 + int width) = ARGBGrayRow_C; 1.1292 + uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1.1293 + if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1.1294 + return -1; 1.1295 + } 1.1296 + // Coalesce rows. 1.1297 + if (dst_stride_argb == width * 4) { 1.1298 + width *= height; 1.1299 + height = 1; 1.1300 + dst_stride_argb = 0; 1.1301 + } 1.1302 +#if defined(HAS_ARGBGRAYROW_SSSE3) 1.1303 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1.1304 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1305 + ARGBGrayRow = ARGBGrayRow_SSSE3; 1.1306 + } 1.1307 +#elif defined(HAS_ARGBGRAYROW_NEON) 1.1308 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.1309 + ARGBGrayRow = ARGBGrayRow_NEON; 1.1310 + } 1.1311 +#endif 1.1312 + for (y = 0; y < height; ++y) { 1.1313 + ARGBGrayRow(dst, dst, width); 1.1314 + dst += dst_stride_argb; 1.1315 + } 1.1316 + return 0; 1.1317 +} 1.1318 + 1.1319 +// Make a rectangle of ARGB Sepia tone. 1.1320 +LIBYUV_API 1.1321 +int ARGBSepia(uint8* dst_argb, int dst_stride_argb, 1.1322 + int dst_x, int dst_y, int width, int height) { 1.1323 + int y; 1.1324 + void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; 1.1325 + uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1.1326 + if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { 1.1327 + return -1; 1.1328 + } 1.1329 + // Coalesce rows. 1.1330 + if (dst_stride_argb == width * 4) { 1.1331 + width *= height; 1.1332 + height = 1; 1.1333 + dst_stride_argb = 0; 1.1334 + } 1.1335 +#if defined(HAS_ARGBSEPIAROW_SSSE3) 1.1336 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1.1337 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1338 + ARGBSepiaRow = ARGBSepiaRow_SSSE3; 1.1339 + } 1.1340 +#elif defined(HAS_ARGBSEPIAROW_NEON) 1.1341 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.1342 + ARGBSepiaRow = ARGBSepiaRow_NEON; 1.1343 + } 1.1344 +#endif 1.1345 + for (y = 0; y < height; ++y) { 1.1346 + ARGBSepiaRow(dst, width); 1.1347 + dst += dst_stride_argb; 1.1348 + } 1.1349 + return 0; 1.1350 +} 1.1351 + 1.1352 +// Apply a 4x4 matrix to each ARGB pixel. 1.1353 +// Note: Normally for shading, but can be used to swizzle or invert. 1.1354 +LIBYUV_API 1.1355 +int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, 1.1356 + uint8* dst_argb, int dst_stride_argb, 1.1357 + const int8* matrix_argb, 1.1358 + int width, int height) { 1.1359 + int y; 1.1360 + void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, 1.1361 + const int8* matrix_argb, int width) = ARGBColorMatrixRow_C; 1.1362 + if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { 1.1363 + return -1; 1.1364 + } 1.1365 + if (height < 0) { 1.1366 + height = -height; 1.1367 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1368 + src_stride_argb = -src_stride_argb; 1.1369 + } 1.1370 + // Coalesce rows. 1.1371 + if (src_stride_argb == width * 4 && 1.1372 + dst_stride_argb == width * 4) { 1.1373 + width *= height; 1.1374 + height = 1; 1.1375 + src_stride_argb = dst_stride_argb = 0; 1.1376 + } 1.1377 +#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) 1.1378 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && 1.1379 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1380 + ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; 1.1381 + } 1.1382 +#elif defined(HAS_ARGBCOLORMATRIXROW_NEON) 1.1383 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.1384 + ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; 1.1385 + } 1.1386 +#endif 1.1387 + for (y = 0; y < height; ++y) { 1.1388 + ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); 1.1389 + src_argb += src_stride_argb; 1.1390 + dst_argb += dst_stride_argb; 1.1391 + } 1.1392 + return 0; 1.1393 +} 1.1394 + 1.1395 +// Apply a 4x3 matrix to each ARGB pixel. 1.1396 +// Deprecated. 1.1397 +LIBYUV_API 1.1398 +int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, 1.1399 + const int8* matrix_rgb, 1.1400 + int dst_x, int dst_y, int width, int height) { 1.1401 + SIMD_ALIGNED(int8 matrix_argb[16]); 1.1402 + uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1.1403 + if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || 1.1404 + dst_x < 0 || dst_y < 0) { 1.1405 + return -1; 1.1406 + } 1.1407 + 1.1408 + // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. 1.1409 + matrix_argb[0] = matrix_rgb[0] / 2; 1.1410 + matrix_argb[1] = matrix_rgb[1] / 2; 1.1411 + matrix_argb[2] = matrix_rgb[2] / 2; 1.1412 + matrix_argb[3] = matrix_rgb[3] / 2; 1.1413 + matrix_argb[4] = matrix_rgb[4] / 2; 1.1414 + matrix_argb[5] = matrix_rgb[5] / 2; 1.1415 + matrix_argb[6] = matrix_rgb[6] / 2; 1.1416 + matrix_argb[7] = matrix_rgb[7] / 2; 1.1417 + matrix_argb[8] = matrix_rgb[8] / 2; 1.1418 + matrix_argb[9] = matrix_rgb[9] / 2; 1.1419 + matrix_argb[10] = matrix_rgb[10] / 2; 1.1420 + matrix_argb[11] = matrix_rgb[11] / 2; 1.1421 + matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; 1.1422 + matrix_argb[15] = 64; // 1.0 1.1423 + 1.1424 + return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, 1.1425 + dst, dst_stride_argb, 1.1426 + &matrix_argb[0], width, height); 1.1427 +} 1.1428 + 1.1429 +// Apply a color table each ARGB pixel. 1.1430 +// Table contains 256 ARGB values. 1.1431 +LIBYUV_API 1.1432 +int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, 1.1433 + const uint8* table_argb, 1.1434 + int dst_x, int dst_y, int width, int height) { 1.1435 + int y; 1.1436 + void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, 1.1437 + int width) = ARGBColorTableRow_C; 1.1438 + uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1.1439 + if (!dst_argb || !table_argb || width <= 0 || height <= 0 || 1.1440 + dst_x < 0 || dst_y < 0) { 1.1441 + return -1; 1.1442 + } 1.1443 + // Coalesce rows. 1.1444 + if (dst_stride_argb == width * 4) { 1.1445 + width *= height; 1.1446 + height = 1; 1.1447 + dst_stride_argb = 0; 1.1448 + } 1.1449 +#if defined(HAS_ARGBCOLORTABLEROW_X86) 1.1450 + if (TestCpuFlag(kCpuHasX86)) { 1.1451 + ARGBColorTableRow = ARGBColorTableRow_X86; 1.1452 + } 1.1453 +#endif 1.1454 + for (y = 0; y < height; ++y) { 1.1455 + ARGBColorTableRow(dst, table_argb, width); 1.1456 + dst += dst_stride_argb; 1.1457 + } 1.1458 + return 0; 1.1459 +} 1.1460 + 1.1461 +// Apply a color table each ARGB pixel but preserve destination alpha. 1.1462 +// Table contains 256 ARGB values. 1.1463 +LIBYUV_API 1.1464 +int RGBColorTable(uint8* dst_argb, int dst_stride_argb, 1.1465 + const uint8* table_argb, 1.1466 + int dst_x, int dst_y, int width, int height) { 1.1467 + int y; 1.1468 + void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, 1.1469 + int width) = RGBColorTableRow_C; 1.1470 + uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1.1471 + if (!dst_argb || !table_argb || width <= 0 || height <= 0 || 1.1472 + dst_x < 0 || dst_y < 0) { 1.1473 + return -1; 1.1474 + } 1.1475 + // Coalesce rows. 1.1476 + if (dst_stride_argb == width * 4) { 1.1477 + width *= height; 1.1478 + height = 1; 1.1479 + dst_stride_argb = 0; 1.1480 + } 1.1481 +#if defined(HAS_RGBCOLORTABLEROW_X86) 1.1482 + if (TestCpuFlag(kCpuHasX86)) { 1.1483 + RGBColorTableRow = RGBColorTableRow_X86; 1.1484 + } 1.1485 +#endif 1.1486 + for (y = 0; y < height; ++y) { 1.1487 + RGBColorTableRow(dst, table_argb, width); 1.1488 + dst += dst_stride_argb; 1.1489 + } 1.1490 + return 0; 1.1491 +} 1.1492 + 1.1493 +// ARGBQuantize is used to posterize art. 1.1494 +// e.g. rgb / qvalue * qvalue + qvalue / 2 1.1495 +// But the low levels implement efficiently with 3 parameters, and could be 1.1496 +// used for other high level operations. 1.1497 +// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 1.1498 +// where scale is 1 / interval_size as a fixed point value. 1.1499 +// The divide is replaces with a multiply by reciprocal fixed point multiply. 1.1500 +// Caveat - although SSE2 saturates, the C function does not and should be used 1.1501 +// with care if doing anything but quantization. 1.1502 +LIBYUV_API 1.1503 +int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, 1.1504 + int scale, int interval_size, int interval_offset, 1.1505 + int dst_x, int dst_y, int width, int height) { 1.1506 + int y; 1.1507 + void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, 1.1508 + int interval_offset, int width) = ARGBQuantizeRow_C; 1.1509 + uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; 1.1510 + if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || 1.1511 + interval_size < 1 || interval_size > 255) { 1.1512 + return -1; 1.1513 + } 1.1514 + // Coalesce rows. 1.1515 + if (dst_stride_argb == width * 4) { 1.1516 + width *= height; 1.1517 + height = 1; 1.1518 + dst_stride_argb = 0; 1.1519 + } 1.1520 +#if defined(HAS_ARGBQUANTIZEROW_SSE2) 1.1521 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && 1.1522 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1523 + ARGBQuantizeRow = ARGBQuantizeRow_SSE2; 1.1524 + } 1.1525 +#elif defined(HAS_ARGBQUANTIZEROW_NEON) 1.1526 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.1527 + ARGBQuantizeRow = ARGBQuantizeRow_NEON; 1.1528 + } 1.1529 +#endif 1.1530 + for (y = 0; y < height; ++y) { 1.1531 + ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); 1.1532 + dst += dst_stride_argb; 1.1533 + } 1.1534 + return 0; 1.1535 +} 1.1536 + 1.1537 +// Computes table of cumulative sum for image where the value is the sum 1.1538 +// of all values above and to the left of the entry. Used by ARGBBlur. 1.1539 +LIBYUV_API 1.1540 +int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, 1.1541 + int32* dst_cumsum, int dst_stride32_cumsum, 1.1542 + int width, int height) { 1.1543 + int y; 1.1544 + void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, 1.1545 + const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; 1.1546 + int32* previous_cumsum = dst_cumsum; 1.1547 + if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { 1.1548 + return -1; 1.1549 + } 1.1550 +#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 1.1551 + if (TestCpuFlag(kCpuHasSSE2)) { 1.1552 + ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 1.1553 + } 1.1554 +#endif 1.1555 + memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. 1.1556 + for (y = 0; y < height; ++y) { 1.1557 + ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); 1.1558 + previous_cumsum = dst_cumsum; 1.1559 + dst_cumsum += dst_stride32_cumsum; 1.1560 + src_argb += src_stride_argb; 1.1561 + } 1.1562 + return 0; 1.1563 +} 1.1564 + 1.1565 +// Blur ARGB image. 1.1566 +// Caller should allocate CumulativeSum table of width * height * 16 bytes 1.1567 +// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory 1.1568 +// as the buffer is treated as circular. 1.1569 +LIBYUV_API 1.1570 +int ARGBBlur(const uint8* src_argb, int src_stride_argb, 1.1571 + uint8* dst_argb, int dst_stride_argb, 1.1572 + int32* dst_cumsum, int dst_stride32_cumsum, 1.1573 + int width, int height, int radius) { 1.1574 + int y; 1.1575 + void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum, 1.1576 + const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; 1.1577 + void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, 1.1578 + int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; 1.1579 + int32* cumsum_bot_row; 1.1580 + int32* max_cumsum_bot_row; 1.1581 + int32* cumsum_top_row; 1.1582 + 1.1583 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.1584 + return -1; 1.1585 + } 1.1586 + if (height < 0) { 1.1587 + height = -height; 1.1588 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1589 + src_stride_argb = -src_stride_argb; 1.1590 + } 1.1591 + if (radius > height) { 1.1592 + radius = height; 1.1593 + } 1.1594 + if (radius > (width / 2 - 1)) { 1.1595 + radius = width / 2 - 1; 1.1596 + } 1.1597 + if (radius <= 0) { 1.1598 + return -1; 1.1599 + } 1.1600 +#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) 1.1601 + if (TestCpuFlag(kCpuHasSSE2)) { 1.1602 + ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; 1.1603 + CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; 1.1604 + } 1.1605 +#endif 1.1606 + // Compute enough CumulativeSum for first row to be blurred. After this 1.1607 + // one row of CumulativeSum is updated at a time. 1.1608 + ARGBComputeCumulativeSum(src_argb, src_stride_argb, 1.1609 + dst_cumsum, dst_stride32_cumsum, 1.1610 + width, radius); 1.1611 + 1.1612 + src_argb = src_argb + radius * src_stride_argb; 1.1613 + cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; 1.1614 + 1.1615 + max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; 1.1616 + cumsum_top_row = &dst_cumsum[0]; 1.1617 + 1.1618 + for (y = 0; y < height; ++y) { 1.1619 + int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; 1.1620 + int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); 1.1621 + int area = radius * (bot_y - top_y); 1.1622 + int boxwidth = radius * 4; 1.1623 + int x; 1.1624 + int n; 1.1625 + 1.1626 + // Increment cumsum_top_row pointer with circular buffer wrap around. 1.1627 + if (top_y) { 1.1628 + cumsum_top_row += dst_stride32_cumsum; 1.1629 + if (cumsum_top_row >= max_cumsum_bot_row) { 1.1630 + cumsum_top_row = dst_cumsum; 1.1631 + } 1.1632 + } 1.1633 + // Increment cumsum_bot_row pointer with circular buffer wrap around and 1.1634 + // then fill in a row of CumulativeSum. 1.1635 + if ((y + radius) < height) { 1.1636 + const int32* prev_cumsum_bot_row = cumsum_bot_row; 1.1637 + cumsum_bot_row += dst_stride32_cumsum; 1.1638 + if (cumsum_bot_row >= max_cumsum_bot_row) { 1.1639 + cumsum_bot_row = dst_cumsum; 1.1640 + } 1.1641 + ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, 1.1642 + width); 1.1643 + src_argb += src_stride_argb; 1.1644 + } 1.1645 + 1.1646 + // Left clipped. 1.1647 + for (x = 0; x < radius + 1; ++x) { 1.1648 + CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, 1.1649 + boxwidth, area, &dst_argb[x * 4], 1); 1.1650 + area += (bot_y - top_y); 1.1651 + boxwidth += 4; 1.1652 + } 1.1653 + 1.1654 + // Middle unclipped. 1.1655 + n = (width - 1) - radius - x + 1; 1.1656 + CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, 1.1657 + boxwidth, area, &dst_argb[x * 4], n); 1.1658 + 1.1659 + // Right clipped. 1.1660 + for (x += n; x <= width - 1; ++x) { 1.1661 + area -= (bot_y - top_y); 1.1662 + boxwidth -= 4; 1.1663 + CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, 1.1664 + cumsum_bot_row + (x - radius - 1) * 4, 1.1665 + boxwidth, area, &dst_argb[x * 4], 1); 1.1666 + } 1.1667 + dst_argb += dst_stride_argb; 1.1668 + } 1.1669 + return 0; 1.1670 +} 1.1671 + 1.1672 +// Multiply ARGB image by a specified ARGB value. 1.1673 +LIBYUV_API 1.1674 +int ARGBShade(const uint8* src_argb, int src_stride_argb, 1.1675 + uint8* dst_argb, int dst_stride_argb, 1.1676 + int width, int height, uint32 value) { 1.1677 + int y; 1.1678 + void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, 1.1679 + int width, uint32 value) = ARGBShadeRow_C; 1.1680 + if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { 1.1681 + return -1; 1.1682 + } 1.1683 + if (height < 0) { 1.1684 + height = -height; 1.1685 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1686 + src_stride_argb = -src_stride_argb; 1.1687 + } 1.1688 + // Coalesce rows. 1.1689 + if (src_stride_argb == width * 4 && 1.1690 + dst_stride_argb == width * 4) { 1.1691 + width *= height; 1.1692 + height = 1; 1.1693 + src_stride_argb = dst_stride_argb = 0; 1.1694 + } 1.1695 +#if defined(HAS_ARGBSHADEROW_SSE2) 1.1696 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && 1.1697 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1.1698 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1699 + ARGBShadeRow = ARGBShadeRow_SSE2; 1.1700 + } 1.1701 +#elif defined(HAS_ARGBSHADEROW_NEON) 1.1702 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.1703 + ARGBShadeRow = ARGBShadeRow_NEON; 1.1704 + } 1.1705 +#endif 1.1706 + 1.1707 + for (y = 0; y < height; ++y) { 1.1708 + ARGBShadeRow(src_argb, dst_argb, width, value); 1.1709 + src_argb += src_stride_argb; 1.1710 + dst_argb += dst_stride_argb; 1.1711 + } 1.1712 + return 0; 1.1713 +} 1.1714 + 1.1715 +// Interpolate 2 ARGB images by specified amount (0 to 255). 1.1716 +LIBYUV_API 1.1717 +int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, 1.1718 + const uint8* src_argb1, int src_stride_argb1, 1.1719 + uint8* dst_argb, int dst_stride_argb, 1.1720 + int width, int height, int interpolation) { 1.1721 + int y; 1.1722 + void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, 1.1723 + ptrdiff_t src_stride, int dst_width, 1.1724 + int source_y_fraction) = InterpolateRow_C; 1.1725 + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { 1.1726 + return -1; 1.1727 + } 1.1728 + // Negative height means invert the image. 1.1729 + if (height < 0) { 1.1730 + height = -height; 1.1731 + dst_argb = dst_argb + (height - 1) * dst_stride_argb; 1.1732 + dst_stride_argb = -dst_stride_argb; 1.1733 + } 1.1734 + // Coalesce rows. 1.1735 + if (src_stride_argb0 == width * 4 && 1.1736 + src_stride_argb1 == width * 4 && 1.1737 + dst_stride_argb == width * 4) { 1.1738 + width *= height; 1.1739 + height = 1; 1.1740 + src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; 1.1741 + } 1.1742 +#if defined(HAS_INTERPOLATEROW_SSE2) 1.1743 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1.1744 + InterpolateRow = InterpolateRow_Any_SSE2; 1.1745 + if (IS_ALIGNED(width, 4)) { 1.1746 + InterpolateRow = InterpolateRow_Unaligned_SSE2; 1.1747 + if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && 1.1748 + IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && 1.1749 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1750 + InterpolateRow = InterpolateRow_SSE2; 1.1751 + } 1.1752 + } 1.1753 + } 1.1754 +#endif 1.1755 +#if defined(HAS_INTERPOLATEROW_SSSE3) 1.1756 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { 1.1757 + InterpolateRow = InterpolateRow_Any_SSSE3; 1.1758 + if (IS_ALIGNED(width, 4)) { 1.1759 + InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1.1760 + if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && 1.1761 + IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && 1.1762 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1763 + InterpolateRow = InterpolateRow_SSSE3; 1.1764 + } 1.1765 + } 1.1766 + } 1.1767 +#endif 1.1768 +#if defined(HAS_INTERPOLATEROW_AVX2) 1.1769 + if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { 1.1770 + InterpolateRow = InterpolateRow_Any_AVX2; 1.1771 + if (IS_ALIGNED(width, 8)) { 1.1772 + InterpolateRow = InterpolateRow_AVX2; 1.1773 + } 1.1774 + } 1.1775 +#endif 1.1776 +#if defined(HAS_INTERPOLATEROW_NEON) 1.1777 + if (TestCpuFlag(kCpuHasNEON) && width >= 4) { 1.1778 + InterpolateRow = InterpolateRow_Any_NEON; 1.1779 + if (IS_ALIGNED(width, 4)) { 1.1780 + InterpolateRow = InterpolateRow_NEON; 1.1781 + } 1.1782 + } 1.1783 +#endif 1.1784 +#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1.1785 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 && 1.1786 + IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) && 1.1787 + IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) && 1.1788 + IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 1.1789 + ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2; 1.1790 + } 1.1791 +#endif 1.1792 + 1.1793 + for (y = 0; y < height; ++y) { 1.1794 + InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0, 1.1795 + width * 4, interpolation); 1.1796 + src_argb0 += src_stride_argb0; 1.1797 + src_argb1 += src_stride_argb1; 1.1798 + dst_argb += dst_stride_argb; 1.1799 + } 1.1800 + return 0; 1.1801 +} 1.1802 + 1.1803 +// Shuffle ARGB channel order. e.g. BGRA to ARGB. 1.1804 +LIBYUV_API 1.1805 +int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, 1.1806 + uint8* dst_argb, int dst_stride_argb, 1.1807 + const uint8* shuffler, int width, int height) { 1.1808 + int y; 1.1809 + void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, 1.1810 + const uint8* shuffler, int pix) = ARGBShuffleRow_C; 1.1811 + if (!src_bgra || !dst_argb || 1.1812 + width <= 0 || height == 0) { 1.1813 + return -1; 1.1814 + } 1.1815 + // Negative height means invert the image. 1.1816 + if (height < 0) { 1.1817 + height = -height; 1.1818 + src_bgra = src_bgra + (height - 1) * src_stride_bgra; 1.1819 + src_stride_bgra = -src_stride_bgra; 1.1820 + } 1.1821 + // Coalesce rows. 1.1822 + if (src_stride_bgra == width * 4 && 1.1823 + dst_stride_argb == width * 4) { 1.1824 + width *= height; 1.1825 + height = 1; 1.1826 + src_stride_bgra = dst_stride_argb = 0; 1.1827 + } 1.1828 +#if defined(HAS_ARGBSHUFFLEROW_SSE2) 1.1829 + if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { 1.1830 + ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; 1.1831 + if (IS_ALIGNED(width, 4)) { 1.1832 + ARGBShuffleRow = ARGBShuffleRow_SSE2; 1.1833 + } 1.1834 + } 1.1835 +#endif 1.1836 +#if defined(HAS_ARGBSHUFFLEROW_SSSE3) 1.1837 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { 1.1838 + ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; 1.1839 + if (IS_ALIGNED(width, 8)) { 1.1840 + ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3; 1.1841 + if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) && 1.1842 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.1843 + ARGBShuffleRow = ARGBShuffleRow_SSSE3; 1.1844 + } 1.1845 + } 1.1846 + } 1.1847 +#endif 1.1848 +#if defined(HAS_ARGBSHUFFLEROW_AVX2) 1.1849 + if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { 1.1850 + ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; 1.1851 + if (IS_ALIGNED(width, 16)) { 1.1852 + ARGBShuffleRow = ARGBShuffleRow_AVX2; 1.1853 + } 1.1854 + } 1.1855 +#endif 1.1856 +#if defined(HAS_ARGBSHUFFLEROW_NEON) 1.1857 + if (TestCpuFlag(kCpuHasNEON) && width >= 4) { 1.1858 + ARGBShuffleRow = ARGBShuffleRow_Any_NEON; 1.1859 + if (IS_ALIGNED(width, 4)) { 1.1860 + ARGBShuffleRow = ARGBShuffleRow_NEON; 1.1861 + } 1.1862 + } 1.1863 +#endif 1.1864 + 1.1865 + for (y = 0; y < height; ++y) { 1.1866 + ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); 1.1867 + src_bgra += src_stride_bgra; 1.1868 + dst_argb += dst_stride_argb; 1.1869 + } 1.1870 + return 0; 1.1871 +} 1.1872 + 1.1873 +// Sobel ARGB effect. 1.1874 +static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, 1.1875 + uint8* dst_argb, int dst_stride_argb, 1.1876 + int width, int height, 1.1877 + void (*SobelRow)(const uint8* src_sobelx, 1.1878 + const uint8* src_sobely, 1.1879 + uint8* dst, int width)) { 1.1880 + int y; 1.1881 + void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, 1.1882 + uint32 selector, int pix) = ARGBToBayerGGRow_C; 1.1883 + void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, 1.1884 + uint8* dst_sobely, int width) = SobelYRow_C; 1.1885 + void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, 1.1886 + const uint8* src_y2, uint8* dst_sobely, int width) = 1.1887 + SobelXRow_C; 1.1888 + const int kEdge = 16; // Extra pixels at start of row for extrude/align. 1.1889 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.1890 + return -1; 1.1891 + } 1.1892 + // Negative height means invert the image. 1.1893 + if (height < 0) { 1.1894 + height = -height; 1.1895 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.1896 + src_stride_argb = -src_stride_argb; 1.1897 + } 1.1898 + // ARGBToBayer used to select G channel from ARGB. 1.1899 +#if defined(HAS_ARGBTOBAYERGGROW_SSE2) 1.1900 + if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && 1.1901 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { 1.1902 + ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; 1.1903 + if (IS_ALIGNED(width, 8)) { 1.1904 + ARGBToBayerRow = ARGBToBayerGGRow_SSE2; 1.1905 + } 1.1906 + } 1.1907 +#endif 1.1908 +#if defined(HAS_ARGBTOBAYERROW_SSSE3) 1.1909 + if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && 1.1910 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { 1.1911 + ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; 1.1912 + if (IS_ALIGNED(width, 8)) { 1.1913 + ARGBToBayerRow = ARGBToBayerRow_SSSE3; 1.1914 + } 1.1915 + } 1.1916 +#endif 1.1917 +#if defined(HAS_ARGBTOBAYERGGROW_NEON) 1.1918 + if (TestCpuFlag(kCpuHasNEON) && width >= 8) { 1.1919 + ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; 1.1920 + if (IS_ALIGNED(width, 8)) { 1.1921 + ARGBToBayerRow = ARGBToBayerGGRow_NEON; 1.1922 + } 1.1923 + } 1.1924 +#endif 1.1925 +#if defined(HAS_SOBELYROW_SSE2) 1.1926 + if (TestCpuFlag(kCpuHasSSE2)) { 1.1927 + SobelYRow = SobelYRow_SSE2; 1.1928 + } 1.1929 +#endif 1.1930 +#if defined(HAS_SOBELYROW_NEON) 1.1931 + if (TestCpuFlag(kCpuHasNEON)) { 1.1932 + SobelYRow = SobelYRow_NEON; 1.1933 + } 1.1934 +#endif 1.1935 +#if defined(HAS_SOBELXROW_SSE2) 1.1936 + if (TestCpuFlag(kCpuHasSSE2)) { 1.1937 + SobelXRow = SobelXRow_SSE2; 1.1938 + } 1.1939 +#endif 1.1940 +#if defined(HAS_SOBELXROW_NEON) 1.1941 + if (TestCpuFlag(kCpuHasNEON)) { 1.1942 + SobelXRow = SobelXRow_NEON; 1.1943 + } 1.1944 +#endif 1.1945 + { 1.1946 + // 3 rows with edges before/after. 1.1947 + const int kRowSize = (width + kEdge + 15) & ~15; 1.1948 + align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); 1.1949 + uint8* row_sobelx = rows; 1.1950 + uint8* row_sobely = rows + kRowSize; 1.1951 + uint8* row_y = rows + kRowSize * 2; 1.1952 + 1.1953 + // Convert first row. 1.1954 + uint8* row_y0 = row_y + kEdge; 1.1955 + uint8* row_y1 = row_y0 + kRowSize; 1.1956 + uint8* row_y2 = row_y1 + kRowSize; 1.1957 + ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width); 1.1958 + row_y0[-1] = row_y0[0]; 1.1959 + memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. 1.1960 + ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width); 1.1961 + row_y1[-1] = row_y1[0]; 1.1962 + memset(row_y1 + width, row_y1[width - 1], 16); 1.1963 + memset(row_y2 + width, 0, 16); 1.1964 + 1.1965 + for (y = 0; y < height; ++y) { 1.1966 + // Convert next row of ARGB to Y. 1.1967 + if (y < (height - 1)) { 1.1968 + src_argb += src_stride_argb; 1.1969 + } 1.1970 + ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); 1.1971 + row_y2[-1] = row_y2[0]; 1.1972 + row_y2[width] = row_y2[width - 1]; 1.1973 + 1.1974 + SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); 1.1975 + SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); 1.1976 + SobelRow(row_sobelx, row_sobely, dst_argb, width); 1.1977 + 1.1978 + // Cycle thru circular queue of 3 row_y buffers. 1.1979 + { 1.1980 + uint8* row_yt = row_y0; 1.1981 + row_y0 = row_y1; 1.1982 + row_y1 = row_y2; 1.1983 + row_y2 = row_yt; 1.1984 + } 1.1985 + 1.1986 + dst_argb += dst_stride_argb; 1.1987 + } 1.1988 + free_aligned_buffer_64(rows); 1.1989 + } 1.1990 + return 0; 1.1991 +} 1.1992 + 1.1993 +// Sobel ARGB effect. 1.1994 +LIBYUV_API 1.1995 +int ARGBSobel(const uint8* src_argb, int src_stride_argb, 1.1996 + uint8* dst_argb, int dst_stride_argb, 1.1997 + int width, int height) { 1.1998 + void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, 1.1999 + uint8* dst_argb, int width) = SobelRow_C; 1.2000 +#if defined(HAS_SOBELROW_SSE2) 1.2001 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 1.2002 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.2003 + SobelRow = SobelRow_SSE2; 1.2004 + } 1.2005 +#endif 1.2006 +#if defined(HAS_SOBELROW_NEON) 1.2007 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.2008 + SobelRow = SobelRow_NEON; 1.2009 + } 1.2010 +#endif 1.2011 + return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 1.2012 + width, height, SobelRow); 1.2013 +} 1.2014 + 1.2015 +// Sobel ARGB effect with planar output. 1.2016 +LIBYUV_API 1.2017 +int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, 1.2018 + uint8* dst_y, int dst_stride_y, 1.2019 + int width, int height) { 1.2020 + void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, 1.2021 + uint8* dst_, int width) = SobelToPlaneRow_C; 1.2022 +#if defined(HAS_SOBELTOPLANEROW_SSE2) 1.2023 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 1.2024 + IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { 1.2025 + SobelToPlaneRow = SobelToPlaneRow_SSE2; 1.2026 + } 1.2027 +#endif 1.2028 +#if defined(HAS_SOBELTOPLANEROW_NEON) 1.2029 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { 1.2030 + SobelToPlaneRow = SobelToPlaneRow_NEON; 1.2031 + } 1.2032 +#endif 1.2033 + return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, 1.2034 + width, height, SobelToPlaneRow); 1.2035 +} 1.2036 + 1.2037 +// SobelXY ARGB effect. 1.2038 +// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. 1.2039 +LIBYUV_API 1.2040 +int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, 1.2041 + uint8* dst_argb, int dst_stride_argb, 1.2042 + int width, int height) { 1.2043 + void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, 1.2044 + uint8* dst_argb, int width) = SobelXYRow_C; 1.2045 +#if defined(HAS_SOBELXYROW_SSE2) 1.2046 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && 1.2047 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.2048 + SobelXYRow = SobelXYRow_SSE2; 1.2049 + } 1.2050 +#endif 1.2051 +#if defined(HAS_SOBELXYROW_NEON) 1.2052 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { 1.2053 + SobelXYRow = SobelXYRow_NEON; 1.2054 + } 1.2055 +#endif 1.2056 + return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, 1.2057 + width, height, SobelXYRow); 1.2058 +} 1.2059 + 1.2060 +// Apply a 4x4 polynomial to each ARGB pixel. 1.2061 +LIBYUV_API 1.2062 +int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, 1.2063 + uint8* dst_argb, int dst_stride_argb, 1.2064 + const float* poly, 1.2065 + int width, int height) { 1.2066 + int y; 1.2067 + void (*ARGBPolynomialRow)(const uint8* src_argb, 1.2068 + uint8* dst_argb, const float* poly, 1.2069 + int width) = ARGBPolynomialRow_C; 1.2070 + if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { 1.2071 + return -1; 1.2072 + } 1.2073 + // Negative height means invert the image. 1.2074 + if (height < 0) { 1.2075 + height = -height; 1.2076 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.2077 + src_stride_argb = -src_stride_argb; 1.2078 + } 1.2079 + // Coalesce rows. 1.2080 + if (src_stride_argb == width * 4 && 1.2081 + dst_stride_argb == width * 4) { 1.2082 + width *= height; 1.2083 + height = 1; 1.2084 + src_stride_argb = dst_stride_argb = 0; 1.2085 + } 1.2086 +#if defined(HAS_ARGBPOLYNOMIALROW_SSE2) 1.2087 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { 1.2088 + ARGBPolynomialRow = ARGBPolynomialRow_SSE2; 1.2089 + } 1.2090 +#endif 1.2091 +#if defined(HAS_ARGBPOLYNOMIALROW_AVX2) 1.2092 + if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && 1.2093 + IS_ALIGNED(width, 2)) { 1.2094 + ARGBPolynomialRow = ARGBPolynomialRow_AVX2; 1.2095 + } 1.2096 +#endif 1.2097 + 1.2098 + for (y = 0; y < height; ++y) { 1.2099 + ARGBPolynomialRow(src_argb, dst_argb, poly, width); 1.2100 + src_argb += src_stride_argb; 1.2101 + dst_argb += dst_stride_argb; 1.2102 + } 1.2103 + return 0; 1.2104 +} 1.2105 + 1.2106 +// Apply a lumacolortable to each ARGB pixel. 1.2107 +LIBYUV_API 1.2108 +int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, 1.2109 + uint8* dst_argb, int dst_stride_argb, 1.2110 + const uint8* luma, 1.2111 + int width, int height) { 1.2112 + int y; 1.2113 + void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb, 1.2114 + int width, const uint8* luma, const uint32 lumacoeff) = 1.2115 + ARGBLumaColorTableRow_C; 1.2116 + if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { 1.2117 + return -1; 1.2118 + } 1.2119 + // Negative height means invert the image. 1.2120 + if (height < 0) { 1.2121 + height = -height; 1.2122 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.2123 + src_stride_argb = -src_stride_argb; 1.2124 + } 1.2125 + // Coalesce rows. 1.2126 + if (src_stride_argb == width * 4 && 1.2127 + dst_stride_argb == width * 4) { 1.2128 + width *= height; 1.2129 + height = 1; 1.2130 + src_stride_argb = dst_stride_argb = 0; 1.2131 + } 1.2132 +#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) 1.2133 + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { 1.2134 + ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; 1.2135 + } 1.2136 +#endif 1.2137 + 1.2138 + for (y = 0; y < height; ++y) { 1.2139 + ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); 1.2140 + src_argb += src_stride_argb; 1.2141 + dst_argb += dst_stride_argb; 1.2142 + } 1.2143 + return 0; 1.2144 +} 1.2145 + 1.2146 +// Copy Alpha from one ARGB image to another. 1.2147 +LIBYUV_API 1.2148 +int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, 1.2149 + uint8* dst_argb, int dst_stride_argb, 1.2150 + int width, int height) { 1.2151 + int y; 1.2152 + void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = 1.2153 + ARGBCopyAlphaRow_C; 1.2154 + if (!src_argb || !dst_argb || width <= 0 || height == 0) { 1.2155 + return -1; 1.2156 + } 1.2157 + // Negative height means invert the image. 1.2158 + if (height < 0) { 1.2159 + height = -height; 1.2160 + src_argb = src_argb + (height - 1) * src_stride_argb; 1.2161 + src_stride_argb = -src_stride_argb; 1.2162 + } 1.2163 + // Coalesce rows. 1.2164 + if (src_stride_argb == width * 4 && 1.2165 + dst_stride_argb == width * 4) { 1.2166 + width *= height; 1.2167 + height = 1; 1.2168 + src_stride_argb = dst_stride_argb = 0; 1.2169 + } 1.2170 +#if defined(HAS_ARGBCOPYALPHAROW_SSE2) 1.2171 + if (TestCpuFlag(kCpuHasSSE2) && 1.2172 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && 1.2173 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && 1.2174 + IS_ALIGNED(width, 8)) { 1.2175 + ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; 1.2176 + } 1.2177 +#endif 1.2178 +#if defined(HAS_ARGBCOPYALPHAROW_AVX2) 1.2179 + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { 1.2180 + ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; 1.2181 + } 1.2182 +#endif 1.2183 + 1.2184 + for (y = 0; y < height; ++y) { 1.2185 + ARGBCopyAlphaRow(src_argb, dst_argb, width); 1.2186 + src_argb += src_stride_argb; 1.2187 + dst_argb += dst_stride_argb; 1.2188 + } 1.2189 + return 0; 1.2190 +} 1.2191 + 1.2192 +// Copy a planar Y channel to the alpha channel of a destination ARGB image. 1.2193 +LIBYUV_API 1.2194 +int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, 1.2195 + uint8* dst_argb, int dst_stride_argb, 1.2196 + int width, int height) { 1.2197 + int y; 1.2198 + void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = 1.2199 + ARGBCopyYToAlphaRow_C; 1.2200 + if (!src_y || !dst_argb || width <= 0 || height == 0) { 1.2201 + return -1; 1.2202 + } 1.2203 + // Negative height means invert the image. 1.2204 + if (height < 0) { 1.2205 + height = -height; 1.2206 + src_y = src_y + (height - 1) * src_stride_y; 1.2207 + src_stride_y = -src_stride_y; 1.2208 + } 1.2209 + // Coalesce rows. 1.2210 + if (src_stride_y == width && 1.2211 + dst_stride_argb == width * 4) { 1.2212 + width *= height; 1.2213 + height = 1; 1.2214 + src_stride_y = dst_stride_argb = 0; 1.2215 + } 1.2216 +#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) 1.2217 + if (TestCpuFlag(kCpuHasSSE2) && 1.2218 + IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && 1.2219 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && 1.2220 + IS_ALIGNED(width, 8)) { 1.2221 + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; 1.2222 + } 1.2223 +#endif 1.2224 +#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) 1.2225 + if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { 1.2226 + ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; 1.2227 + } 1.2228 +#endif 1.2229 + 1.2230 + for (y = 0; y < height; ++y) { 1.2231 + ARGBCopyYToAlphaRow(src_y, dst_argb, width); 1.2232 + src_y += src_stride_y; 1.2233 + dst_argb += dst_stride_argb; 1.2234 + } 1.2235 + return 0; 1.2236 +} 1.2237 + 1.2238 +#ifdef __cplusplus 1.2239 +} // extern "C" 1.2240 +} // namespace libyuv 1.2241 +#endif