media/libyuv/source/planar_functions.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libyuv/source/planar_functions.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2238 @@
     1.4 +/*
     1.5 + *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS. All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include "libyuv/planar_functions.h"
    1.15 +
    1.16 +#include <string.h>  // for memset()
    1.17 +
    1.18 +#include "libyuv/cpu_id.h"
    1.19 +#ifdef HAVE_JPEG
    1.20 +#include "libyuv/mjpeg_decoder.h"
    1.21 +#endif
    1.22 +#include "libyuv/row.h"
    1.23 +
    1.24 +#ifdef __cplusplus
    1.25 +namespace libyuv {
    1.26 +extern "C" {
    1.27 +#endif
    1.28 +
    1.29 +// Copy a plane of data
    1.30 +LIBYUV_API
    1.31 +void CopyPlane(const uint8* src_y, int src_stride_y,
    1.32 +               uint8* dst_y, int dst_stride_y,
    1.33 +               int width, int height) {
    1.34 +  int y;
    1.35 +  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
    1.36 +  // Coalesce rows.
    1.37 +  if (src_stride_y == width &&
    1.38 +      dst_stride_y == width) {
    1.39 +    width *= height;
    1.40 +    height = 1;
    1.41 +    src_stride_y = dst_stride_y = 0;
    1.42 +  }
    1.43 +#if defined(HAS_COPYROW_X86)
    1.44 +  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
    1.45 +    CopyRow = CopyRow_X86;
    1.46 +  }
    1.47 +#endif
    1.48 +#if defined(HAS_COPYROW_SSE2)
    1.49 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
    1.50 +      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
    1.51 +      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
    1.52 +    CopyRow = CopyRow_SSE2;
    1.53 +  }
    1.54 +#endif
    1.55 +#if defined(HAS_COPYROW_ERMS)
    1.56 +  if (TestCpuFlag(kCpuHasERMS)) {
    1.57 +    CopyRow = CopyRow_ERMS;
    1.58 +  }
    1.59 +#endif
    1.60 +#if defined(HAS_COPYROW_NEON)
    1.61 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
    1.62 +    CopyRow = CopyRow_NEON;
    1.63 +  }
    1.64 +#endif
    1.65 +#if defined(HAS_COPYROW_MIPS)
    1.66 +  if (TestCpuFlag(kCpuHasMIPS)) {
    1.67 +    CopyRow = CopyRow_MIPS;
    1.68 +  }
    1.69 +#endif
    1.70 +
    1.71 +  // Copy plane
    1.72 +  for (y = 0; y < height; ++y) {
    1.73 +    CopyRow(src_y, dst_y, width);
    1.74 +    src_y += src_stride_y;
    1.75 +    dst_y += dst_stride_y;
    1.76 +  }
    1.77 +}
    1.78 +
    1.79 +// Copy I422.
    1.80 +LIBYUV_API
    1.81 +int I422Copy(const uint8* src_y, int src_stride_y,
    1.82 +             const uint8* src_u, int src_stride_u,
    1.83 +             const uint8* src_v, int src_stride_v,
    1.84 +             uint8* dst_y, int dst_stride_y,
    1.85 +             uint8* dst_u, int dst_stride_u,
    1.86 +             uint8* dst_v, int dst_stride_v,
    1.87 +             int width, int height) {
    1.88 +  int halfwidth = (width + 1) >> 1;
    1.89 +  if (!src_y || !src_u || !src_v ||
    1.90 +      !dst_y || !dst_u || !dst_v ||
    1.91 +      width <= 0 || height == 0) {
    1.92 +    return -1;
    1.93 +  }
    1.94 +  // Negative height means invert the image.
    1.95 +  if (height < 0) {
    1.96 +    height = -height;
    1.97 +    src_y = src_y + (height - 1) * src_stride_y;
    1.98 +    src_u = src_u + (height - 1) * src_stride_u;
    1.99 +    src_v = src_v + (height - 1) * src_stride_v;
   1.100 +    src_stride_y = -src_stride_y;
   1.101 +    src_stride_u = -src_stride_u;
   1.102 +    src_stride_v = -src_stride_v;
   1.103 +  }
   1.104 +  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
   1.105 +  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
   1.106 +  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
   1.107 +  return 0;
   1.108 +}
   1.109 +
   1.110 +// Copy I444.
   1.111 +LIBYUV_API
   1.112 +int I444Copy(const uint8* src_y, int src_stride_y,
   1.113 +             const uint8* src_u, int src_stride_u,
   1.114 +             const uint8* src_v, int src_stride_v,
   1.115 +             uint8* dst_y, int dst_stride_y,
   1.116 +             uint8* dst_u, int dst_stride_u,
   1.117 +             uint8* dst_v, int dst_stride_v,
   1.118 +             int width, int height) {
   1.119 +  if (!src_y || !src_u || !src_v ||
   1.120 +      !dst_y || !dst_u || !dst_v ||
   1.121 +      width <= 0 || height == 0) {
   1.122 +    return -1;
   1.123 +  }
   1.124 +  // Negative height means invert the image.
   1.125 +  if (height < 0) {
   1.126 +    height = -height;
   1.127 +    src_y = src_y + (height - 1) * src_stride_y;
   1.128 +    src_u = src_u + (height - 1) * src_stride_u;
   1.129 +    src_v = src_v + (height - 1) * src_stride_v;
   1.130 +    src_stride_y = -src_stride_y;
   1.131 +    src_stride_u = -src_stride_u;
   1.132 +    src_stride_v = -src_stride_v;
   1.133 +  }
   1.134 +
   1.135 +  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
   1.136 +  CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
   1.137 +  CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
   1.138 +  return 0;
   1.139 +}
   1.140 +
   1.141 +// Copy I400.
   1.142 +LIBYUV_API
   1.143 +int I400ToI400(const uint8* src_y, int src_stride_y,
   1.144 +               uint8* dst_y, int dst_stride_y,
   1.145 +               int width, int height) {
   1.146 +  if (!src_y || !dst_y || width <= 0 || height == 0) {
   1.147 +    return -1;
   1.148 +  }
   1.149 +  // Negative height means invert the image.
   1.150 +  if (height < 0) {
   1.151 +    height = -height;
   1.152 +    src_y = src_y + (height - 1) * src_stride_y;
   1.153 +    src_stride_y = -src_stride_y;
   1.154 +  }
   1.155 +  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
   1.156 +  return 0;
   1.157 +}
   1.158 +
   1.159 +// Convert I420 to I400.
   1.160 +LIBYUV_API
   1.161 +int I420ToI400(const uint8* src_y, int src_stride_y,
   1.162 +               const uint8* src_u, int src_stride_u,
   1.163 +               const uint8* src_v, int src_stride_v,
   1.164 +               uint8* dst_y, int dst_stride_y,
   1.165 +               int width, int height) {
   1.166 +  if (!src_y || !dst_y || width <= 0 || height == 0) {
   1.167 +    return -1;
   1.168 +  }
   1.169 +  // Negative height means invert the image.
   1.170 +  if (height < 0) {
   1.171 +    height = -height;
   1.172 +    src_y = src_y + (height - 1) * src_stride_y;
   1.173 +    src_stride_y = -src_stride_y;
   1.174 +  }
   1.175 +  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
   1.176 +  return 0;
   1.177 +}
   1.178 +
   1.179 +// Mirror a plane of data.
   1.180 +void MirrorPlane(const uint8* src_y, int src_stride_y,
   1.181 +                 uint8* dst_y, int dst_stride_y,
   1.182 +                 int width, int height) {
   1.183 +  int y;
   1.184 +  void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
   1.185 +  // Negative height means invert the image.
   1.186 +  if (height < 0) {
   1.187 +    height = -height;
   1.188 +    src_y = src_y + (height - 1) * src_stride_y;
   1.189 +    src_stride_y = -src_stride_y;
   1.190 +  }
   1.191 +#if defined(HAS_MIRRORROW_NEON)
   1.192 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
   1.193 +    MirrorRow = MirrorRow_NEON;
   1.194 +  }
   1.195 +#endif
   1.196 +#if defined(HAS_MIRRORROW_SSE2)
   1.197 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
   1.198 +    MirrorRow = MirrorRow_SSE2;
   1.199 +  }
   1.200 +#endif
   1.201 +#if defined(HAS_MIRRORROW_SSSE3)
   1.202 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
   1.203 +      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
   1.204 +      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   1.205 +    MirrorRow = MirrorRow_SSSE3;
   1.206 +  }
   1.207 +#endif
   1.208 +#if defined(HAS_MIRRORROW_AVX2)
   1.209 +  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
   1.210 +    MirrorRow = MirrorRow_AVX2;
   1.211 +  }
   1.212 +#endif
   1.213 +
   1.214 +  // Mirror plane
   1.215 +  for (y = 0; y < height; ++y) {
   1.216 +    MirrorRow(src_y, dst_y, width);
   1.217 +    src_y += src_stride_y;
   1.218 +    dst_y += dst_stride_y;
   1.219 +  }
   1.220 +}
   1.221 +
   1.222 +// Convert YUY2 to I422.
   1.223 +LIBYUV_API
   1.224 +int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
   1.225 +               uint8* dst_y, int dst_stride_y,
   1.226 +               uint8* dst_u, int dst_stride_u,
   1.227 +               uint8* dst_v, int dst_stride_v,
   1.228 +               int width, int height) {
   1.229 +  int y;
   1.230 +  void (*YUY2ToUV422Row)(const uint8* src_yuy2,
   1.231 +                         uint8* dst_u, uint8* dst_v, int pix) =
   1.232 +      YUY2ToUV422Row_C;
   1.233 +  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
   1.234 +      YUY2ToYRow_C;
   1.235 +  // Negative height means invert the image.
   1.236 +  if (height < 0) {
   1.237 +    height = -height;
   1.238 +    src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
   1.239 +    src_stride_yuy2 = -src_stride_yuy2;
   1.240 +  }
   1.241 +  // Coalesce rows.
   1.242 +  if (src_stride_yuy2 == width * 2 &&
   1.243 +      dst_stride_y == width &&
   1.244 +      dst_stride_u * 2 == width &&
   1.245 +      dst_stride_v * 2 == width) {
   1.246 +    width *= height;
   1.247 +    height = 1;
   1.248 +    src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
   1.249 +  }
   1.250 +#if defined(HAS_YUY2TOYROW_SSE2)
   1.251 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
   1.252 +    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
   1.253 +    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
   1.254 +    if (IS_ALIGNED(width, 16)) {
   1.255 +      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
   1.256 +      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
   1.257 +      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
   1.258 +        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
   1.259 +        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   1.260 +          YUY2ToYRow = YUY2ToYRow_SSE2;
   1.261 +        }
   1.262 +      }
   1.263 +    }
   1.264 +  }
   1.265 +#endif
   1.266 +#if defined(HAS_YUY2TOYROW_AVX2)
   1.267 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
   1.268 +    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
   1.269 +    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
   1.270 +    if (IS_ALIGNED(width, 32)) {
   1.271 +      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
   1.272 +      YUY2ToYRow = YUY2ToYRow_AVX2;
   1.273 +    }
   1.274 +  }
   1.275 +#endif
   1.276 +#if defined(HAS_YUY2TOYROW_NEON)
   1.277 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1.278 +    YUY2ToYRow = YUY2ToYRow_Any_NEON;
   1.279 +    if (width >= 16) {
   1.280 +      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
   1.281 +    }
   1.282 +    if (IS_ALIGNED(width, 16)) {
   1.283 +      YUY2ToYRow = YUY2ToYRow_NEON;
   1.284 +      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
   1.285 +    }
   1.286 +  }
   1.287 +#endif
   1.288 +
   1.289 +  for (y = 0; y < height; ++y) {
   1.290 +    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
   1.291 +    YUY2ToYRow(src_yuy2, dst_y, width);
   1.292 +    src_yuy2 += src_stride_yuy2;
   1.293 +    dst_y += dst_stride_y;
   1.294 +    dst_u += dst_stride_u;
   1.295 +    dst_v += dst_stride_v;
   1.296 +  }
   1.297 +  return 0;
   1.298 +}
   1.299 +
   1.300 +// Convert UYVY to I422.
   1.301 +LIBYUV_API
   1.302 +int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
   1.303 +               uint8* dst_y, int dst_stride_y,
   1.304 +               uint8* dst_u, int dst_stride_u,
   1.305 +               uint8* dst_v, int dst_stride_v,
   1.306 +               int width, int height) {
   1.307 +  int y;
   1.308 +  void (*UYVYToUV422Row)(const uint8* src_uyvy,
   1.309 +                         uint8* dst_u, uint8* dst_v, int pix) =
   1.310 +      UYVYToUV422Row_C;
   1.311 +  void (*UYVYToYRow)(const uint8* src_uyvy,
   1.312 +                     uint8* dst_y, int pix) = UYVYToYRow_C;
   1.313 +  // Negative height means invert the image.
   1.314 +  if (height < 0) {
   1.315 +    height = -height;
   1.316 +    src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
   1.317 +    src_stride_uyvy = -src_stride_uyvy;
   1.318 +  }
   1.319 +  // Coalesce rows.
   1.320 +  if (src_stride_uyvy == width * 2 &&
   1.321 +      dst_stride_y == width &&
   1.322 +      dst_stride_u * 2 == width &&
   1.323 +      dst_stride_v * 2 == width) {
   1.324 +    width *= height;
   1.325 +    height = 1;
   1.326 +    src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
   1.327 +  }
   1.328 +#if defined(HAS_UYVYTOYROW_SSE2)
   1.329 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
   1.330 +    UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
   1.331 +    UYVYToYRow = UYVYToYRow_Any_SSE2;
   1.332 +    if (IS_ALIGNED(width, 16)) {
   1.333 +      UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
   1.334 +      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
   1.335 +      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
   1.336 +        UYVYToUV422Row = UYVYToUV422Row_SSE2;
   1.337 +        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
   1.338 +          UYVYToYRow = UYVYToYRow_SSE2;
   1.339 +        }
   1.340 +      }
   1.341 +    }
   1.342 +  }
   1.343 +#endif
   1.344 +#if defined(HAS_UYVYTOYROW_AVX2)
   1.345 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
   1.346 +    UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
   1.347 +    UYVYToYRow = UYVYToYRow_Any_AVX2;
   1.348 +    if (IS_ALIGNED(width, 32)) {
   1.349 +      UYVYToUV422Row = UYVYToUV422Row_AVX2;
   1.350 +      UYVYToYRow = UYVYToYRow_AVX2;
   1.351 +    }
   1.352 +  }
   1.353 +#endif
   1.354 +#if defined(HAS_UYVYTOYROW_NEON)
   1.355 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1.356 +    UYVYToYRow = UYVYToYRow_Any_NEON;
   1.357 +    if (width >= 16) {
   1.358 +      UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
   1.359 +    }
   1.360 +    if (IS_ALIGNED(width, 16)) {
   1.361 +      UYVYToYRow = UYVYToYRow_NEON;
   1.362 +      UYVYToUV422Row = UYVYToUV422Row_NEON;
   1.363 +    }
   1.364 +  }
   1.365 +#endif
   1.366 +
   1.367 +  for (y = 0; y < height; ++y) {
   1.368 +    UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
   1.369 +    UYVYToYRow(src_uyvy, dst_y, width);
   1.370 +    src_uyvy += src_stride_uyvy;
   1.371 +    dst_y += dst_stride_y;
   1.372 +    dst_u += dst_stride_u;
   1.373 +    dst_v += dst_stride_v;
   1.374 +  }
   1.375 +  return 0;
   1.376 +}
   1.377 +
   1.378 +// Mirror I400 with optional flipping
   1.379 +LIBYUV_API
   1.380 +int I400Mirror(const uint8* src_y, int src_stride_y,
   1.381 +               uint8* dst_y, int dst_stride_y,
   1.382 +               int width, int height) {
   1.383 +  if (!src_y || !dst_y ||
   1.384 +      width <= 0 || height == 0) {
   1.385 +    return -1;
   1.386 +  }
   1.387 +  // Negative height means invert the image.
   1.388 +  if (height < 0) {
   1.389 +    height = -height;
   1.390 +    src_y = src_y + (height - 1) * src_stride_y;
   1.391 +    src_stride_y = -src_stride_y;
   1.392 +  }
   1.393 +
   1.394 +  MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
   1.395 +  return 0;
   1.396 +}
   1.397 +
   1.398 +// Mirror I420 with optional flipping
   1.399 +LIBYUV_API
   1.400 +int I420Mirror(const uint8* src_y, int src_stride_y,
   1.401 +               const uint8* src_u, int src_stride_u,
   1.402 +               const uint8* src_v, int src_stride_v,
   1.403 +               uint8* dst_y, int dst_stride_y,
   1.404 +               uint8* dst_u, int dst_stride_u,
   1.405 +               uint8* dst_v, int dst_stride_v,
   1.406 +               int width, int height) {
   1.407 +  int halfwidth = (width + 1) >> 1;
   1.408 +  int halfheight = (height + 1) >> 1;
   1.409 +  if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
   1.410 +      width <= 0 || height == 0) {
   1.411 +    return -1;
   1.412 +  }
   1.413 +  // Negative height means invert the image.
   1.414 +  if (height < 0) {
   1.415 +    height = -height;
   1.416 +    halfheight = (height + 1) >> 1;
   1.417 +    src_y = src_y + (height - 1) * src_stride_y;
   1.418 +    src_u = src_u + (halfheight - 1) * src_stride_u;
   1.419 +    src_v = src_v + (halfheight - 1) * src_stride_v;
   1.420 +    src_stride_y = -src_stride_y;
   1.421 +    src_stride_u = -src_stride_u;
   1.422 +    src_stride_v = -src_stride_v;
   1.423 +  }
   1.424 +
   1.425 +  if (dst_y) {
   1.426 +    MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
   1.427 +  }
   1.428 +  MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
   1.429 +  MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
   1.430 +  return 0;
   1.431 +}
   1.432 +
   1.433 +// ARGB mirror.
   1.434 +LIBYUV_API
   1.435 +int ARGBMirror(const uint8* src_argb, int src_stride_argb,
   1.436 +               uint8* dst_argb, int dst_stride_argb,
   1.437 +               int width, int height) {
   1.438 +  int y;
   1.439 +  void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
   1.440 +      ARGBMirrorRow_C;
   1.441 +  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
   1.442 +    return -1;
   1.443 +  }
   1.444 +  // Negative height means invert the image.
   1.445 +  if (height < 0) {
   1.446 +    height = -height;
   1.447 +    src_argb = src_argb + (height - 1) * src_stride_argb;
   1.448 +    src_stride_argb = -src_stride_argb;
   1.449 +  }
   1.450 +
   1.451 +#if defined(HAS_ARGBMIRRORROW_SSSE3)
   1.452 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
   1.453 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
   1.454 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   1.455 +    ARGBMirrorRow = ARGBMirrorRow_SSSE3;
   1.456 +  }
   1.457 +#endif
   1.458 +#if defined(HAS_ARGBMIRRORROW_AVX2)
   1.459 +  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
   1.460 +    ARGBMirrorRow = ARGBMirrorRow_AVX2;
   1.461 +  }
   1.462 +#endif
   1.463 +#if defined(HAS_ARGBMIRRORROW_NEON)
   1.464 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
   1.465 +    ARGBMirrorRow = ARGBMirrorRow_NEON;
   1.466 +  }
   1.467 +#endif
   1.468 +
   1.469 +  // Mirror plane
   1.470 +  for (y = 0; y < height; ++y) {
   1.471 +    ARGBMirrorRow(src_argb, dst_argb, width);
   1.472 +    src_argb += src_stride_argb;
   1.473 +    dst_argb += dst_stride_argb;
   1.474 +  }
   1.475 +  return 0;
   1.476 +}
   1.477 +
   1.478 +// Get a blender that optimized for the CPU, alignment and pixel count.
   1.479 +// As there are 6 blenders to choose from, the caller should try to use
   1.480 +// the same blend function for all pixels if possible.
   1.481 +LIBYUV_API
   1.482 +ARGBBlendRow GetARGBBlend() {
   1.483 +  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
   1.484 +                       uint8* dst_argb, int width) = ARGBBlendRow_C;
   1.485 +#if defined(HAS_ARGBBLENDROW_SSSE3)
   1.486 +  if (TestCpuFlag(kCpuHasSSSE3)) {
   1.487 +    ARGBBlendRow = ARGBBlendRow_SSSE3;
   1.488 +    return ARGBBlendRow;
   1.489 +  }
   1.490 +#endif
   1.491 +#if defined(HAS_ARGBBLENDROW_SSE2)
   1.492 +  if (TestCpuFlag(kCpuHasSSE2)) {
   1.493 +    ARGBBlendRow = ARGBBlendRow_SSE2;
   1.494 +  }
   1.495 +#endif
   1.496 +#if defined(HAS_ARGBBLENDROW_NEON)
   1.497 +  if (TestCpuFlag(kCpuHasNEON)) {
   1.498 +    ARGBBlendRow = ARGBBlendRow_NEON;
   1.499 +  }
   1.500 +#endif
   1.501 +  return ARGBBlendRow;
   1.502 +}
   1.503 +
   1.504 +// Alpha Blend 2 ARGB images and store to destination.
   1.505 +LIBYUV_API
   1.506 +int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
   1.507 +              const uint8* src_argb1, int src_stride_argb1,
   1.508 +              uint8* dst_argb, int dst_stride_argb,
   1.509 +              int width, int height) {
   1.510 +  int y;
   1.511 +  void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
   1.512 +                       uint8* dst_argb, int width) = GetARGBBlend();
   1.513 +  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1.514 +    return -1;
   1.515 +  }
   1.516 +  // Negative height means invert the image.
   1.517 +  if (height < 0) {
   1.518 +    height = -height;
   1.519 +    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1.520 +    dst_stride_argb = -dst_stride_argb;
   1.521 +  }
   1.522 +  // Coalesce rows.
   1.523 +  if (src_stride_argb0 == width * 4 &&
   1.524 +      src_stride_argb1 == width * 4 &&
   1.525 +      dst_stride_argb == width * 4) {
   1.526 +    width *= height;
   1.527 +    height = 1;
   1.528 +    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1.529 +  }
   1.530 +
   1.531 +  for (y = 0; y < height; ++y) {
   1.532 +    ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
   1.533 +    src_argb0 += src_stride_argb0;
   1.534 +    src_argb1 += src_stride_argb1;
   1.535 +    dst_argb += dst_stride_argb;
   1.536 +  }
   1.537 +  return 0;
   1.538 +}
   1.539 +
   1.540 +// Multiply 2 ARGB images and store to destination.
   1.541 +LIBYUV_API
   1.542 +int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
   1.543 +                 const uint8* src_argb1, int src_stride_argb1,
   1.544 +                 uint8* dst_argb, int dst_stride_argb,
   1.545 +                 int width, int height) {
   1.546 +  int y;
   1.547 +  void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
   1.548 +                          int width) = ARGBMultiplyRow_C;
   1.549 +  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1.550 +    return -1;
   1.551 +  }
   1.552 +  // Negative height means invert the image.
   1.553 +  if (height < 0) {
   1.554 +    height = -height;
   1.555 +    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1.556 +    dst_stride_argb = -dst_stride_argb;
   1.557 +  }
   1.558 +  // Coalesce rows.
   1.559 +  if (src_stride_argb0 == width * 4 &&
   1.560 +      src_stride_argb1 == width * 4 &&
   1.561 +      dst_stride_argb == width * 4) {
   1.562 +    width *= height;
   1.563 +    height = 1;
   1.564 +    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1.565 +  }
   1.566 +#if defined(HAS_ARGBMULTIPLYROW_SSE2)
   1.567 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1.568 +    ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
   1.569 +    if (IS_ALIGNED(width, 4)) {
   1.570 +      ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
   1.571 +    }
   1.572 +  }
   1.573 +#endif
   1.574 +#if defined(HAS_ARGBMULTIPLYROW_AVX2)
   1.575 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1.576 +    ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
   1.577 +    if (IS_ALIGNED(width, 8)) {
   1.578 +      ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
   1.579 +    }
   1.580 +  }
   1.581 +#endif
   1.582 +#if defined(HAS_ARGBMULTIPLYROW_NEON)
   1.583 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1.584 +    ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
   1.585 +    if (IS_ALIGNED(width, 8)) {
   1.586 +      ARGBMultiplyRow = ARGBMultiplyRow_NEON;
   1.587 +    }
   1.588 +  }
   1.589 +#endif
   1.590 +
   1.591 +  // Multiply plane
   1.592 +  for (y = 0; y < height; ++y) {
   1.593 +    ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
   1.594 +    src_argb0 += src_stride_argb0;
   1.595 +    src_argb1 += src_stride_argb1;
   1.596 +    dst_argb += dst_stride_argb;
   1.597 +  }
   1.598 +  return 0;
   1.599 +}
   1.600 +
   1.601 +// Add 2 ARGB images and store to destination.
   1.602 +LIBYUV_API
   1.603 +int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
   1.604 +            const uint8* src_argb1, int src_stride_argb1,
   1.605 +            uint8* dst_argb, int dst_stride_argb,
   1.606 +            int width, int height) {
   1.607 +  int y;
   1.608 +  void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
   1.609 +                     int width) = ARGBAddRow_C;
   1.610 +  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1.611 +    return -1;
   1.612 +  }
   1.613 +  // Negative height means invert the image.
   1.614 +  if (height < 0) {
   1.615 +    height = -height;
   1.616 +    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1.617 +    dst_stride_argb = -dst_stride_argb;
   1.618 +  }
   1.619 +  // Coalesce rows.
   1.620 +  if (src_stride_argb0 == width * 4 &&
   1.621 +      src_stride_argb1 == width * 4 &&
   1.622 +      dst_stride_argb == width * 4) {
   1.623 +    width *= height;
   1.624 +    height = 1;
   1.625 +    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1.626 +  }
   1.627 +#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
   1.628 +  if (TestCpuFlag(kCpuHasSSE2)) {
   1.629 +    ARGBAddRow = ARGBAddRow_SSE2;
   1.630 +  }
   1.631 +#endif
   1.632 +#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
   1.633 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1.634 +    ARGBAddRow = ARGBAddRow_Any_SSE2;
   1.635 +    if (IS_ALIGNED(width, 4)) {
   1.636 +      ARGBAddRow = ARGBAddRow_SSE2;
   1.637 +    }
   1.638 +  }
   1.639 +#endif
   1.640 +#if defined(HAS_ARGBADDROW_AVX2)
   1.641 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1.642 +    ARGBAddRow = ARGBAddRow_Any_AVX2;
   1.643 +    if (IS_ALIGNED(width, 8)) {
   1.644 +      ARGBAddRow = ARGBAddRow_AVX2;
   1.645 +    }
   1.646 +  }
   1.647 +#endif
   1.648 +#if defined(HAS_ARGBADDROW_NEON)
   1.649 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1.650 +    ARGBAddRow = ARGBAddRow_Any_NEON;
   1.651 +    if (IS_ALIGNED(width, 8)) {
   1.652 +      ARGBAddRow = ARGBAddRow_NEON;
   1.653 +    }
   1.654 +  }
   1.655 +#endif
   1.656 +
   1.657 +  // Add plane
   1.658 +  for (y = 0; y < height; ++y) {
   1.659 +    ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
   1.660 +    src_argb0 += src_stride_argb0;
   1.661 +    src_argb1 += src_stride_argb1;
   1.662 +    dst_argb += dst_stride_argb;
   1.663 +  }
   1.664 +  return 0;
   1.665 +}
   1.666 +
   1.667 +// Subtract 2 ARGB images and store to destination.
   1.668 +LIBYUV_API
   1.669 +int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
   1.670 +                 const uint8* src_argb1, int src_stride_argb1,
   1.671 +                 uint8* dst_argb, int dst_stride_argb,
   1.672 +                 int width, int height) {
   1.673 +  int y;
   1.674 +  void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
   1.675 +                          int width) = ARGBSubtractRow_C;
   1.676 +  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
   1.677 +    return -1;
   1.678 +  }
   1.679 +  // Negative height means invert the image.
   1.680 +  if (height < 0) {
   1.681 +    height = -height;
   1.682 +    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
   1.683 +    dst_stride_argb = -dst_stride_argb;
   1.684 +  }
   1.685 +  // Coalesce rows.
   1.686 +  if (src_stride_argb0 == width * 4 &&
   1.687 +      src_stride_argb1 == width * 4 &&
   1.688 +      dst_stride_argb == width * 4) {
   1.689 +    width *= height;
   1.690 +    height = 1;
   1.691 +    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
   1.692 +  }
   1.693 +#if defined(HAS_ARGBSUBTRACTROW_SSE2)
   1.694 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
   1.695 +    ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
   1.696 +    if (IS_ALIGNED(width, 4)) {
   1.697 +      ARGBSubtractRow = ARGBSubtractRow_SSE2;
   1.698 +    }
   1.699 +  }
   1.700 +#endif
   1.701 +#if defined(HAS_ARGBSUBTRACTROW_AVX2)
   1.702 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
   1.703 +    ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
   1.704 +    if (IS_ALIGNED(width, 8)) {
   1.705 +      ARGBSubtractRow = ARGBSubtractRow_AVX2;
   1.706 +    }
   1.707 +  }
   1.708 +#endif
   1.709 +#if defined(HAS_ARGBSUBTRACTROW_NEON)
   1.710 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1.711 +    ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
   1.712 +    if (IS_ALIGNED(width, 8)) {
   1.713 +      ARGBSubtractRow = ARGBSubtractRow_NEON;
   1.714 +    }
   1.715 +  }
   1.716 +#endif
   1.717 +
   1.718 +  // Subtract plane
   1.719 +  for (y = 0; y < height; ++y) {
   1.720 +    ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
   1.721 +    src_argb0 += src_stride_argb0;
   1.722 +    src_argb1 += src_stride_argb1;
   1.723 +    dst_argb += dst_stride_argb;
   1.724 +  }
   1.725 +  return 0;
   1.726 +}
   1.727 +
   1.728 +// Convert I422 to BGRA.
   1.729 +LIBYUV_API
   1.730 +int I422ToBGRA(const uint8* src_y, int src_stride_y,
   1.731 +               const uint8* src_u, int src_stride_u,
   1.732 +               const uint8* src_v, int src_stride_v,
   1.733 +               uint8* dst_bgra, int dst_stride_bgra,
   1.734 +               int width, int height) {
   1.735 +  int y;
   1.736 +  void (*I422ToBGRARow)(const uint8* y_buf,
   1.737 +                        const uint8* u_buf,
   1.738 +                        const uint8* v_buf,
   1.739 +                        uint8* rgb_buf,
   1.740 +                        int width) = I422ToBGRARow_C;
   1.741 +  if (!src_y || !src_u || !src_v ||
   1.742 +      !dst_bgra ||
   1.743 +      width <= 0 || height == 0) {
   1.744 +    return -1;
   1.745 +  }
   1.746 +  // Negative height means invert the image.
   1.747 +  if (height < 0) {
   1.748 +    height = -height;
   1.749 +    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
   1.750 +    dst_stride_bgra = -dst_stride_bgra;
   1.751 +  }
   1.752 +  // Coalesce rows.
   1.753 +  if (src_stride_y == width &&
   1.754 +      src_stride_u * 2 == width &&
   1.755 +      src_stride_v * 2 == width &&
   1.756 +      dst_stride_bgra == width * 4) {
   1.757 +    width *= height;
   1.758 +    height = 1;
   1.759 +    src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
   1.760 +  }
   1.761 +#if defined(HAS_I422TOBGRAROW_NEON)
   1.762 +  if (TestCpuFlag(kCpuHasNEON)) {
   1.763 +    I422ToBGRARow = I422ToBGRARow_Any_NEON;
   1.764 +    if (IS_ALIGNED(width, 16)) {
   1.765 +      I422ToBGRARow = I422ToBGRARow_NEON;
   1.766 +    }
   1.767 +  }
   1.768 +#elif defined(HAS_I422TOBGRAROW_SSSE3)
   1.769 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1.770 +    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
   1.771 +    if (IS_ALIGNED(width, 8)) {
   1.772 +      I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
   1.773 +      if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
   1.774 +        I422ToBGRARow = I422ToBGRARow_SSSE3;
   1.775 +      }
   1.776 +    }
   1.777 +  }
   1.778 +#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
   1.779 +  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
   1.780 +      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
   1.781 +      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
   1.782 +      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
   1.783 +      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
   1.784 +    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
   1.785 +  }
   1.786 +#endif
   1.787 +
   1.788 +  for (y = 0; y < height; ++y) {
   1.789 +    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
   1.790 +    dst_bgra += dst_stride_bgra;
   1.791 +    src_y += src_stride_y;
   1.792 +    src_u += src_stride_u;
   1.793 +    src_v += src_stride_v;
   1.794 +  }
   1.795 +  return 0;
   1.796 +}
   1.797 +
   1.798 +// Convert I422 to ABGR.
   1.799 +LIBYUV_API
   1.800 +int I422ToABGR(const uint8* src_y, int src_stride_y,
   1.801 +               const uint8* src_u, int src_stride_u,
   1.802 +               const uint8* src_v, int src_stride_v,
   1.803 +               uint8* dst_abgr, int dst_stride_abgr,
   1.804 +               int width, int height) {
   1.805 +  int y;
   1.806 +  void (*I422ToABGRRow)(const uint8* y_buf,
   1.807 +                        const uint8* u_buf,
   1.808 +                        const uint8* v_buf,
   1.809 +                        uint8* rgb_buf,
   1.810 +                        int width) = I422ToABGRRow_C;
   1.811 +  if (!src_y || !src_u || !src_v ||
   1.812 +      !dst_abgr ||
   1.813 +      width <= 0 || height == 0) {
   1.814 +    return -1;
   1.815 +  }
   1.816 +  // Negative height means invert the image.
   1.817 +  if (height < 0) {
   1.818 +    height = -height;
   1.819 +    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
   1.820 +    dst_stride_abgr = -dst_stride_abgr;
   1.821 +  }
   1.822 +  // Coalesce rows.
   1.823 +  if (src_stride_y == width &&
   1.824 +      src_stride_u * 2 == width &&
   1.825 +      src_stride_v * 2 == width &&
   1.826 +      dst_stride_abgr == width * 4) {
   1.827 +    width *= height;
   1.828 +    height = 1;
   1.829 +    src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
   1.830 +  }
   1.831 +#if defined(HAS_I422TOABGRROW_NEON)
   1.832 +  if (TestCpuFlag(kCpuHasNEON)) {
   1.833 +    I422ToABGRRow = I422ToABGRRow_Any_NEON;
   1.834 +    if (IS_ALIGNED(width, 16)) {
   1.835 +      I422ToABGRRow = I422ToABGRRow_NEON;
   1.836 +    }
   1.837 +  }
   1.838 +#elif defined(HAS_I422TOABGRROW_SSSE3)
   1.839 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1.840 +    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
   1.841 +    if (IS_ALIGNED(width, 8)) {
   1.842 +      I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
   1.843 +      if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
   1.844 +        I422ToABGRRow = I422ToABGRRow_SSSE3;
   1.845 +      }
   1.846 +    }
   1.847 +  }
   1.848 +#endif
   1.849 +
   1.850 +  for (y = 0; y < height; ++y) {
   1.851 +    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
   1.852 +    dst_abgr += dst_stride_abgr;
   1.853 +    src_y += src_stride_y;
   1.854 +    src_u += src_stride_u;
   1.855 +    src_v += src_stride_v;
   1.856 +  }
   1.857 +  return 0;
   1.858 +}
   1.859 +
   1.860 +// Convert I422 to RGBA.
   1.861 +LIBYUV_API
   1.862 +int I422ToRGBA(const uint8* src_y, int src_stride_y,
   1.863 +               const uint8* src_u, int src_stride_u,
   1.864 +               const uint8* src_v, int src_stride_v,
   1.865 +               uint8* dst_rgba, int dst_stride_rgba,
   1.866 +               int width, int height) {
   1.867 +  int y;
   1.868 +  void (*I422ToRGBARow)(const uint8* y_buf,
   1.869 +                        const uint8* u_buf,
   1.870 +                        const uint8* v_buf,
   1.871 +                        uint8* rgb_buf,
   1.872 +                        int width) = I422ToRGBARow_C;
   1.873 +  if (!src_y || !src_u || !src_v ||
   1.874 +      !dst_rgba ||
   1.875 +      width <= 0 || height == 0) {
   1.876 +    return -1;
   1.877 +  }
   1.878 +  // Negative height means invert the image.
   1.879 +  if (height < 0) {
   1.880 +    height = -height;
   1.881 +    dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
   1.882 +    dst_stride_rgba = -dst_stride_rgba;
   1.883 +  }
   1.884 +  // Coalesce rows.
   1.885 +  if (src_stride_y == width &&
   1.886 +      src_stride_u * 2 == width &&
   1.887 +      src_stride_v * 2 == width &&
   1.888 +      dst_stride_rgba == width * 4) {
   1.889 +    width *= height;
   1.890 +    height = 1;
   1.891 +    src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
   1.892 +  }
   1.893 +#if defined(HAS_I422TORGBAROW_NEON)
   1.894 +  if (TestCpuFlag(kCpuHasNEON)) {
   1.895 +    I422ToRGBARow = I422ToRGBARow_Any_NEON;
   1.896 +    if (IS_ALIGNED(width, 16)) {
   1.897 +      I422ToRGBARow = I422ToRGBARow_NEON;
   1.898 +    }
   1.899 +  }
   1.900 +#elif defined(HAS_I422TORGBAROW_SSSE3)
   1.901 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1.902 +    I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
   1.903 +    if (IS_ALIGNED(width, 8)) {
   1.904 +      I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
   1.905 +      if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
   1.906 +        I422ToRGBARow = I422ToRGBARow_SSSE3;
   1.907 +      }
   1.908 +    }
   1.909 +  }
   1.910 +#endif
   1.911 +
   1.912 +  for (y = 0; y < height; ++y) {
   1.913 +    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
   1.914 +    dst_rgba += dst_stride_rgba;
   1.915 +    src_y += src_stride_y;
   1.916 +    src_u += src_stride_u;
   1.917 +    src_v += src_stride_v;
   1.918 +  }
   1.919 +  return 0;
   1.920 +}
   1.921 +
   1.922 +// Convert NV12 to RGB565.
   1.923 +LIBYUV_API
   1.924 +int NV12ToRGB565(const uint8* src_y, int src_stride_y,
   1.925 +                 const uint8* src_uv, int src_stride_uv,
   1.926 +                 uint8* dst_rgb565, int dst_stride_rgb565,
   1.927 +                 int width, int height) {
   1.928 +  int y;
   1.929 +  void (*NV12ToRGB565Row)(const uint8* y_buf,
   1.930 +                          const uint8* uv_buf,
   1.931 +                          uint8* rgb_buf,
   1.932 +                          int width) = NV12ToRGB565Row_C;
   1.933 +  if (!src_y || !src_uv || !dst_rgb565 ||
   1.934 +      width <= 0 || height == 0) {
   1.935 +    return -1;
   1.936 +  }
   1.937 +  // Negative height means invert the image.
   1.938 +  if (height < 0) {
   1.939 +    height = -height;
   1.940 +    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1.941 +    dst_stride_rgb565 = -dst_stride_rgb565;
   1.942 +  }
   1.943 +#if defined(HAS_NV12TORGB565ROW_SSSE3)
   1.944 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1.945 +    NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
   1.946 +    if (IS_ALIGNED(width, 8)) {
   1.947 +      NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
   1.948 +    }
   1.949 +  }
   1.950 +#elif defined(HAS_NV12TORGB565ROW_NEON)
   1.951 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
   1.952 +    NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
   1.953 +    if (IS_ALIGNED(width, 8)) {
   1.954 +      NV12ToRGB565Row = NV12ToRGB565Row_NEON;
   1.955 +    }
   1.956 +  }
   1.957 +#endif
   1.958 +
   1.959 +  for (y = 0; y < height; ++y) {
   1.960 +    NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
   1.961 +    dst_rgb565 += dst_stride_rgb565;
   1.962 +    src_y += src_stride_y;
   1.963 +    if (y & 1) {
   1.964 +      src_uv += src_stride_uv;
   1.965 +    }
   1.966 +  }
   1.967 +  return 0;
   1.968 +}
   1.969 +
   1.970 +// Convert NV21 to RGB565.
   1.971 +LIBYUV_API
   1.972 +int NV21ToRGB565(const uint8* src_y, int src_stride_y,
   1.973 +                 const uint8* src_vu, int src_stride_vu,
   1.974 +                 uint8* dst_rgb565, int dst_stride_rgb565,
   1.975 +                 int width, int height) {
   1.976 +  int y;
   1.977 +  void (*NV21ToRGB565Row)(const uint8* y_buf,
   1.978 +                          const uint8* src_vu,
   1.979 +                          uint8* rgb_buf,
   1.980 +                          int width) = NV21ToRGB565Row_C;
   1.981 +  if (!src_y || !src_vu || !dst_rgb565 ||
   1.982 +      width <= 0 || height == 0) {
   1.983 +    return -1;
   1.984 +  }
   1.985 +  // Negative height means invert the image.
   1.986 +  if (height < 0) {
   1.987 +    height = -height;
   1.988 +    dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
   1.989 +    dst_stride_rgb565 = -dst_stride_rgb565;
   1.990 +  }
   1.991 +#if defined(HAS_NV21TORGB565ROW_SSSE3)
   1.992 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
   1.993 +    NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
   1.994 +    if (IS_ALIGNED(width, 8)) {
   1.995 +      NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
   1.996 +    }
   1.997 +  }
   1.998 +#elif defined(HAS_NV21TORGB565ROW_NEON)
   1.999 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
  1.1000 +    NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
  1.1001 +    if (IS_ALIGNED(width, 8)) {
  1.1002 +      NV21ToRGB565Row = NV21ToRGB565Row_NEON;
  1.1003 +    }
  1.1004 +  }
  1.1005 +#endif
  1.1006 +
  1.1007 +  for (y = 0; y < height; ++y) {
  1.1008 +    NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
  1.1009 +    dst_rgb565 += dst_stride_rgb565;
  1.1010 +    src_y += src_stride_y;
  1.1011 +    if (y & 1) {
  1.1012 +      src_vu += src_stride_vu;
  1.1013 +    }
  1.1014 +  }
  1.1015 +  return 0;
  1.1016 +}
  1.1017 +
  1.1018 +LIBYUV_API
  1.1019 +void SetPlane(uint8* dst_y, int dst_stride_y,
  1.1020 +              int width, int height,
  1.1021 +              uint32 value) {
  1.1022 +  int y;
  1.1023 +  uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
  1.1024 +  void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
  1.1025 +  // Coalesce rows.
  1.1026 +  if (dst_stride_y == width) {
  1.1027 +    width *= height;
  1.1028 +    height = 1;
  1.1029 +    dst_stride_y = 0;
  1.1030 +  }
  1.1031 +#if defined(HAS_SETROW_NEON)
  1.1032 +  if (TestCpuFlag(kCpuHasNEON) &&
  1.1033 +      IS_ALIGNED(width, 16) &&
  1.1034 +      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
  1.1035 +    SetRow = SetRow_NEON;
  1.1036 +  }
  1.1037 +#endif
  1.1038 +#if defined(HAS_SETROW_X86)
  1.1039 +  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
  1.1040 +    SetRow = SetRow_X86;
  1.1041 +  }
  1.1042 +#endif
  1.1043 +
  1.1044 +  // Set plane
  1.1045 +  for (y = 0; y < height; ++y) {
  1.1046 +    SetRow(dst_y, v32, width);
  1.1047 +    dst_y += dst_stride_y;
  1.1048 +  }
  1.1049 +}
  1.1050 +
  1.1051 +// Draw a rectangle into I420
  1.1052 +LIBYUV_API
  1.1053 +int I420Rect(uint8* dst_y, int dst_stride_y,
  1.1054 +             uint8* dst_u, int dst_stride_u,
  1.1055 +             uint8* dst_v, int dst_stride_v,
  1.1056 +             int x, int y,
  1.1057 +             int width, int height,
  1.1058 +             int value_y, int value_u, int value_v) {
  1.1059 +  int halfwidth = (width + 1) >> 1;
  1.1060 +  int halfheight = (height + 1) >> 1;
  1.1061 +  uint8* start_y = dst_y + y * dst_stride_y + x;
  1.1062 +  uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
  1.1063 +  uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
  1.1064 +  if (!dst_y || !dst_u || !dst_v ||
  1.1065 +      width <= 0 || height <= 0 ||
  1.1066 +      x < 0 || y < 0 ||
  1.1067 +      value_y < 0 || value_y > 255 ||
  1.1068 +      value_u < 0 || value_u > 255 ||
  1.1069 +      value_v < 0 || value_v > 255) {
  1.1070 +    return -1;
  1.1071 +  }
  1.1072 +
  1.1073 +  SetPlane(start_y, dst_stride_y, width, height, value_y);
  1.1074 +  SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
  1.1075 +  SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
  1.1076 +  return 0;
  1.1077 +}
  1.1078 +
  1.1079 +// Draw a rectangle into ARGB
  1.1080 +LIBYUV_API
  1.1081 +int ARGBRect(uint8* dst_argb, int dst_stride_argb,
  1.1082 +             int dst_x, int dst_y,
  1.1083 +             int width, int height,
  1.1084 +             uint32 value) {
  1.1085 +  if (!dst_argb ||
  1.1086 +      width <= 0 || height <= 0 ||
  1.1087 +      dst_x < 0 || dst_y < 0) {
  1.1088 +    return -1;
  1.1089 +  }
  1.1090 +  dst_argb += dst_y * dst_stride_argb + dst_x * 4;
  1.1091 +  // Coalesce rows.
  1.1092 +  if (dst_stride_argb == width * 4) {
  1.1093 +    width *= height;
  1.1094 +    height = 1;
  1.1095 +    dst_stride_argb = 0;
  1.1096 +  }
  1.1097 +#if defined(HAS_SETROW_NEON)
  1.1098 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
  1.1099 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1100 +    ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
  1.1101 +    return 0;
  1.1102 +  }
  1.1103 +#endif
  1.1104 +#if defined(HAS_SETROW_X86)
  1.1105 +  if (TestCpuFlag(kCpuHasX86)) {
  1.1106 +    ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
  1.1107 +    return 0;
  1.1108 +  }
  1.1109 +#endif
  1.1110 +  ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
  1.1111 +  return 0;
  1.1112 +}
  1.1113 +
  1.1114 +// Convert unattentuated ARGB to preattenuated ARGB.
  1.1115 +// An unattenutated ARGB alpha blend uses the formula
  1.1116 +// p = a * f + (1 - a) * b
  1.1117 +// where
  1.1118 +//   p is output pixel
  1.1119 +//   f is foreground pixel
  1.1120 +//   b is background pixel
  1.1121 +//   a is alpha value from foreground pixel
  1.1122 +// An preattenutated ARGB alpha blend uses the formula
  1.1123 +// p = f + (1 - a) * b
  1.1124 +// where
  1.1125 +//   f is foreground pixel premultiplied by alpha
  1.1126 +
  1.1127 +LIBYUV_API
  1.1128 +int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
  1.1129 +                  uint8* dst_argb, int dst_stride_argb,
  1.1130 +                  int width, int height) {
  1.1131 +  int y;
  1.1132 +  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
  1.1133 +                           int width) = ARGBAttenuateRow_C;
  1.1134 +  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
  1.1135 +    return -1;
  1.1136 +  }
  1.1137 +  if (height < 0) {
  1.1138 +    height = -height;
  1.1139 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.1140 +    src_stride_argb = -src_stride_argb;
  1.1141 +  }
  1.1142 +  // Coalesce rows.
  1.1143 +  if (src_stride_argb == width * 4 &&
  1.1144 +      dst_stride_argb == width * 4) {
  1.1145 +    width *= height;
  1.1146 +    height = 1;
  1.1147 +    src_stride_argb = dst_stride_argb = 0;
  1.1148 +  }
  1.1149 +#if defined(HAS_ARGBATTENUATEROW_SSE2)
  1.1150 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
  1.1151 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
  1.1152 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1153 +    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
  1.1154 +    if (IS_ALIGNED(width, 4)) {
  1.1155 +      ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
  1.1156 +    }
  1.1157 +  }
  1.1158 +#endif
  1.1159 +#if defined(HAS_ARGBATTENUATEROW_SSSE3)
  1.1160 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
  1.1161 +    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
  1.1162 +    if (IS_ALIGNED(width, 4)) {
  1.1163 +      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
  1.1164 +    }
  1.1165 +  }
  1.1166 +#endif
  1.1167 +#if defined(HAS_ARGBATTENUATEROW_AVX2)
  1.1168 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
  1.1169 +    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
  1.1170 +    if (IS_ALIGNED(width, 8)) {
  1.1171 +      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
  1.1172 +    }
  1.1173 +  }
  1.1174 +#endif
  1.1175 +#if defined(HAS_ARGBATTENUATEROW_NEON)
  1.1176 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
  1.1177 +    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
  1.1178 +    if (IS_ALIGNED(width, 8)) {
  1.1179 +      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
  1.1180 +    }
  1.1181 +  }
  1.1182 +#endif
  1.1183 +
  1.1184 +  for (y = 0; y < height; ++y) {
  1.1185 +    ARGBAttenuateRow(src_argb, dst_argb, width);
  1.1186 +    src_argb += src_stride_argb;
  1.1187 +    dst_argb += dst_stride_argb;
  1.1188 +  }
  1.1189 +  return 0;
  1.1190 +}
  1.1191 +
  1.1192 +// Convert preattentuated ARGB to unattenuated ARGB.
  1.1193 +LIBYUV_API
  1.1194 +int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
  1.1195 +                    uint8* dst_argb, int dst_stride_argb,
  1.1196 +                    int width, int height) {
  1.1197 +  int y;
  1.1198 +  void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
  1.1199 +                             int width) = ARGBUnattenuateRow_C;
  1.1200 +  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
  1.1201 +    return -1;
  1.1202 +  }
  1.1203 +  if (height < 0) {
  1.1204 +    height = -height;
  1.1205 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.1206 +    src_stride_argb = -src_stride_argb;
  1.1207 +  }
  1.1208 +  // Coalesce rows.
  1.1209 +  if (src_stride_argb == width * 4 &&
  1.1210 +      dst_stride_argb == width * 4) {
  1.1211 +    width *= height;
  1.1212 +    height = 1;
  1.1213 +    src_stride_argb = dst_stride_argb = 0;
  1.1214 +  }
  1.1215 +#if defined(HAS_ARGBUNATTENUATEROW_SSE2)
  1.1216 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
  1.1217 +    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
  1.1218 +    if (IS_ALIGNED(width, 4)) {
  1.1219 +      ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
  1.1220 +    }
  1.1221 +  }
  1.1222 +#endif
  1.1223 +#if defined(HAS_ARGBUNATTENUATEROW_AVX2)
  1.1224 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
  1.1225 +    ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
  1.1226 +    if (IS_ALIGNED(width, 8)) {
  1.1227 +      ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
  1.1228 +    }
  1.1229 +  }
  1.1230 +#endif
  1.1231 +// TODO(fbarchard): Neon version.
  1.1232 +
  1.1233 +  for (y = 0; y < height; ++y) {
  1.1234 +    ARGBUnattenuateRow(src_argb, dst_argb, width);
  1.1235 +    src_argb += src_stride_argb;
  1.1236 +    dst_argb += dst_stride_argb;
  1.1237 +  }
  1.1238 +  return 0;
  1.1239 +}
  1.1240 +
  1.1241 +// Convert ARGB to Grayed ARGB.
  1.1242 +LIBYUV_API
  1.1243 +int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
  1.1244 +               uint8* dst_argb, int dst_stride_argb,
  1.1245 +               int width, int height) {
  1.1246 +  int y;
  1.1247 +  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
  1.1248 +                      int width) = ARGBGrayRow_C;
  1.1249 +  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
  1.1250 +    return -1;
  1.1251 +  }
  1.1252 +  if (height < 0) {
  1.1253 +    height = -height;
  1.1254 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.1255 +    src_stride_argb = -src_stride_argb;
  1.1256 +  }
  1.1257 +  // Coalesce rows.
  1.1258 +  if (src_stride_argb == width * 4 &&
  1.1259 +      dst_stride_argb == width * 4) {
  1.1260 +    width *= height;
  1.1261 +    height = 1;
  1.1262 +    src_stride_argb = dst_stride_argb = 0;
  1.1263 +  }
  1.1264 +#if defined(HAS_ARGBGRAYROW_SSSE3)
  1.1265 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
  1.1266 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
  1.1267 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1268 +    ARGBGrayRow = ARGBGrayRow_SSSE3;
  1.1269 +  }
  1.1270 +#elif defined(HAS_ARGBGRAYROW_NEON)
  1.1271 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.1272 +    ARGBGrayRow = ARGBGrayRow_NEON;
  1.1273 +  }
  1.1274 +#endif
  1.1275 +
  1.1276 +  for (y = 0; y < height; ++y) {
  1.1277 +    ARGBGrayRow(src_argb, dst_argb, width);
  1.1278 +    src_argb += src_stride_argb;
  1.1279 +    dst_argb += dst_stride_argb;
  1.1280 +  }
  1.1281 +  return 0;
  1.1282 +}
  1.1283 +
  1.1284 +// Make a rectangle of ARGB gray scale.
  1.1285 +LIBYUV_API
  1.1286 +int ARGBGray(uint8* dst_argb, int dst_stride_argb,
  1.1287 +             int dst_x, int dst_y,
  1.1288 +             int width, int height) {
  1.1289 +  int y;
  1.1290 +  void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
  1.1291 +                      int width) = ARGBGrayRow_C;
  1.1292 +  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  1.1293 +  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
  1.1294 +    return -1;
  1.1295 +  }
  1.1296 +  // Coalesce rows.
  1.1297 +  if (dst_stride_argb == width * 4) {
  1.1298 +    width *= height;
  1.1299 +    height = 1;
  1.1300 +    dst_stride_argb = 0;
  1.1301 +  }
  1.1302 +#if defined(HAS_ARGBGRAYROW_SSSE3)
  1.1303 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
  1.1304 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1305 +    ARGBGrayRow = ARGBGrayRow_SSSE3;
  1.1306 +  }
  1.1307 +#elif defined(HAS_ARGBGRAYROW_NEON)
  1.1308 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.1309 +    ARGBGrayRow = ARGBGrayRow_NEON;
  1.1310 +  }
  1.1311 +#endif
  1.1312 +  for (y = 0; y < height; ++y) {
  1.1313 +    ARGBGrayRow(dst, dst, width);
  1.1314 +    dst += dst_stride_argb;
  1.1315 +  }
  1.1316 +  return 0;
  1.1317 +}
  1.1318 +
  1.1319 +// Make a rectangle of ARGB Sepia tone.
  1.1320 +LIBYUV_API
  1.1321 +int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
  1.1322 +              int dst_x, int dst_y, int width, int height) {
  1.1323 +  int y;
  1.1324 +  void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
  1.1325 +  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  1.1326 +  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
  1.1327 +    return -1;
  1.1328 +  }
  1.1329 +  // Coalesce rows.
  1.1330 +  if (dst_stride_argb == width * 4) {
  1.1331 +    width *= height;
  1.1332 +    height = 1;
  1.1333 +    dst_stride_argb = 0;
  1.1334 +  }
  1.1335 +#if defined(HAS_ARGBSEPIAROW_SSSE3)
  1.1336 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
  1.1337 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1338 +    ARGBSepiaRow = ARGBSepiaRow_SSSE3;
  1.1339 +  }
  1.1340 +#elif defined(HAS_ARGBSEPIAROW_NEON)
  1.1341 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.1342 +    ARGBSepiaRow = ARGBSepiaRow_NEON;
  1.1343 +  }
  1.1344 +#endif
  1.1345 +  for (y = 0; y < height; ++y) {
  1.1346 +    ARGBSepiaRow(dst, width);
  1.1347 +    dst += dst_stride_argb;
  1.1348 +  }
  1.1349 +  return 0;
  1.1350 +}
  1.1351 +
  1.1352 +// Apply a 4x4 matrix to each ARGB pixel.
  1.1353 +// Note: Normally for shading, but can be used to swizzle or invert.
  1.1354 +LIBYUV_API
  1.1355 +int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
  1.1356 +                    uint8* dst_argb, int dst_stride_argb,
  1.1357 +                    const int8* matrix_argb,
  1.1358 +                    int width, int height) {
  1.1359 +  int y;
  1.1360 +  void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
  1.1361 +      const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
  1.1362 +  if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
  1.1363 +    return -1;
  1.1364 +  }
  1.1365 +  if (height < 0) {
  1.1366 +    height = -height;
  1.1367 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.1368 +    src_stride_argb = -src_stride_argb;
  1.1369 +  }
  1.1370 +  // Coalesce rows.
  1.1371 +  if (src_stride_argb == width * 4 &&
  1.1372 +      dst_stride_argb == width * 4) {
  1.1373 +    width *= height;
  1.1374 +    height = 1;
  1.1375 +    src_stride_argb = dst_stride_argb = 0;
  1.1376 +  }
  1.1377 +#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
  1.1378 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
  1.1379 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1380 +    ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
  1.1381 +  }
  1.1382 +#elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
  1.1383 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.1384 +    ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
  1.1385 +  }
  1.1386 +#endif
  1.1387 +  for (y = 0; y < height; ++y) {
  1.1388 +    ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
  1.1389 +    src_argb += src_stride_argb;
  1.1390 +    dst_argb += dst_stride_argb;
  1.1391 +  }
  1.1392 +  return 0;
  1.1393 +}
  1.1394 +
  1.1395 +// Apply a 4x3 matrix to each ARGB pixel.
  1.1396 +// Deprecated.
  1.1397 +LIBYUV_API
  1.1398 +int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
  1.1399 +                   const int8* matrix_rgb,
  1.1400 +                   int dst_x, int dst_y, int width, int height) {
  1.1401 +  SIMD_ALIGNED(int8 matrix_argb[16]);
  1.1402 +  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  1.1403 +  if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
  1.1404 +      dst_x < 0 || dst_y < 0) {
  1.1405 +    return -1;
  1.1406 +  }
  1.1407 +
  1.1408 +  // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
  1.1409 +  matrix_argb[0] = matrix_rgb[0] / 2;
  1.1410 +  matrix_argb[1] = matrix_rgb[1] / 2;
  1.1411 +  matrix_argb[2] = matrix_rgb[2] / 2;
  1.1412 +  matrix_argb[3] = matrix_rgb[3] / 2;
  1.1413 +  matrix_argb[4] = matrix_rgb[4] / 2;
  1.1414 +  matrix_argb[5] = matrix_rgb[5] / 2;
  1.1415 +  matrix_argb[6] = matrix_rgb[6] / 2;
  1.1416 +  matrix_argb[7] = matrix_rgb[7] / 2;
  1.1417 +  matrix_argb[8] = matrix_rgb[8] / 2;
  1.1418 +  matrix_argb[9] = matrix_rgb[9] / 2;
  1.1419 +  matrix_argb[10] = matrix_rgb[10] / 2;
  1.1420 +  matrix_argb[11] = matrix_rgb[11] / 2;
  1.1421 +  matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
  1.1422 +  matrix_argb[15] = 64;  // 1.0
  1.1423 +
  1.1424 +  return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
  1.1425 +                         dst, dst_stride_argb,
  1.1426 +                         &matrix_argb[0], width, height);
  1.1427 +}
  1.1428 +
  1.1429 +// Apply a color table each ARGB pixel.
  1.1430 +// Table contains 256 ARGB values.
  1.1431 +LIBYUV_API
  1.1432 +int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
  1.1433 +                   const uint8* table_argb,
  1.1434 +                   int dst_x, int dst_y, int width, int height) {
  1.1435 +  int y;
  1.1436 +  void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
  1.1437 +                            int width) = ARGBColorTableRow_C;
  1.1438 +  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  1.1439 +  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
  1.1440 +      dst_x < 0 || dst_y < 0) {
  1.1441 +    return -1;
  1.1442 +  }
  1.1443 +  // Coalesce rows.
  1.1444 +  if (dst_stride_argb == width * 4) {
  1.1445 +    width *= height;
  1.1446 +    height = 1;
  1.1447 +    dst_stride_argb = 0;
  1.1448 +  }
  1.1449 +#if defined(HAS_ARGBCOLORTABLEROW_X86)
  1.1450 +  if (TestCpuFlag(kCpuHasX86)) {
  1.1451 +    ARGBColorTableRow = ARGBColorTableRow_X86;
  1.1452 +  }
  1.1453 +#endif
  1.1454 +  for (y = 0; y < height; ++y) {
  1.1455 +    ARGBColorTableRow(dst, table_argb, width);
  1.1456 +    dst += dst_stride_argb;
  1.1457 +  }
  1.1458 +  return 0;
  1.1459 +}
  1.1460 +
  1.1461 +// Apply a color table each ARGB pixel but preserve destination alpha.
  1.1462 +// Table contains 256 ARGB values.
  1.1463 +LIBYUV_API
  1.1464 +int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
  1.1465 +                  const uint8* table_argb,
  1.1466 +                  int dst_x, int dst_y, int width, int height) {
  1.1467 +  int y;
  1.1468 +  void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
  1.1469 +                           int width) = RGBColorTableRow_C;
  1.1470 +  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  1.1471 +  if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
  1.1472 +      dst_x < 0 || dst_y < 0) {
  1.1473 +    return -1;
  1.1474 +  }
  1.1475 +  // Coalesce rows.
  1.1476 +  if (dst_stride_argb == width * 4) {
  1.1477 +    width *= height;
  1.1478 +    height = 1;
  1.1479 +    dst_stride_argb = 0;
  1.1480 +  }
  1.1481 +#if defined(HAS_RGBCOLORTABLEROW_X86)
  1.1482 +  if (TestCpuFlag(kCpuHasX86)) {
  1.1483 +    RGBColorTableRow = RGBColorTableRow_X86;
  1.1484 +  }
  1.1485 +#endif
  1.1486 +  for (y = 0; y < height; ++y) {
  1.1487 +    RGBColorTableRow(dst, table_argb, width);
  1.1488 +    dst += dst_stride_argb;
  1.1489 +  }
  1.1490 +  return 0;
  1.1491 +}
  1.1492 +
  1.1493 +// ARGBQuantize is used to posterize art.
  1.1494 +// e.g. rgb / qvalue * qvalue + qvalue / 2
  1.1495 +// But the low levels implement efficiently with 3 parameters, and could be
  1.1496 +// used for other high level operations.
  1.1497 +// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
  1.1498 +// where scale is 1 / interval_size as a fixed point value.
  1.1499 +// The divide is replaces with a multiply by reciprocal fixed point multiply.
  1.1500 +// Caveat - although SSE2 saturates, the C function does not and should be used
  1.1501 +// with care if doing anything but quantization.
  1.1502 +LIBYUV_API
  1.1503 +int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
  1.1504 +                 int scale, int interval_size, int interval_offset,
  1.1505 +                 int dst_x, int dst_y, int width, int height) {
  1.1506 +  int y;
  1.1507 +  void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
  1.1508 +                          int interval_offset, int width) = ARGBQuantizeRow_C;
  1.1509 +  uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
  1.1510 +  if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
  1.1511 +      interval_size < 1 || interval_size > 255) {
  1.1512 +    return -1;
  1.1513 +  }
  1.1514 +  // Coalesce rows.
  1.1515 +  if (dst_stride_argb == width * 4) {
  1.1516 +    width *= height;
  1.1517 +    height = 1;
  1.1518 +    dst_stride_argb = 0;
  1.1519 +  }
  1.1520 +#if defined(HAS_ARGBQUANTIZEROW_SSE2)
  1.1521 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
  1.1522 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1523 +    ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
  1.1524 +  }
  1.1525 +#elif defined(HAS_ARGBQUANTIZEROW_NEON)
  1.1526 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.1527 +    ARGBQuantizeRow = ARGBQuantizeRow_NEON;
  1.1528 +  }
  1.1529 +#endif
  1.1530 +  for (y = 0; y < height; ++y) {
  1.1531 +    ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
  1.1532 +    dst += dst_stride_argb;
  1.1533 +  }
  1.1534 +  return 0;
  1.1535 +}
  1.1536 +
  1.1537 +// Computes table of cumulative sum for image where the value is the sum
  1.1538 +// of all values above and to the left of the entry. Used by ARGBBlur.
  1.1539 +LIBYUV_API
  1.1540 +int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
  1.1541 +                             int32* dst_cumsum, int dst_stride32_cumsum,
  1.1542 +                             int width, int height) {
  1.1543 +  int y;
  1.1544 +  void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
  1.1545 +      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
  1.1546 +  int32* previous_cumsum = dst_cumsum;
  1.1547 +  if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
  1.1548 +    return -1;
  1.1549 +  }
  1.1550 +#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
  1.1551 +  if (TestCpuFlag(kCpuHasSSE2)) {
  1.1552 +    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
  1.1553 +  }
  1.1554 +#endif
  1.1555 +  memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4);  // 4 int per pixel.
  1.1556 +  for (y = 0; y < height; ++y) {
  1.1557 +    ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
  1.1558 +    previous_cumsum = dst_cumsum;
  1.1559 +    dst_cumsum += dst_stride32_cumsum;
  1.1560 +    src_argb += src_stride_argb;
  1.1561 +  }
  1.1562 +  return 0;
  1.1563 +}
  1.1564 +
  1.1565 +// Blur ARGB image.
  1.1566 +// Caller should allocate CumulativeSum table of width * height * 16 bytes
  1.1567 +// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
  1.1568 +// as the buffer is treated as circular.
  1.1569 +LIBYUV_API
  1.1570 +int ARGBBlur(const uint8* src_argb, int src_stride_argb,
  1.1571 +             uint8* dst_argb, int dst_stride_argb,
  1.1572 +             int32* dst_cumsum, int dst_stride32_cumsum,
  1.1573 +             int width, int height, int radius) {
  1.1574 +  int y;
  1.1575 +  void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
  1.1576 +      const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
  1.1577 +  void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
  1.1578 +      int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
  1.1579 +  int32* cumsum_bot_row;
  1.1580 +  int32* max_cumsum_bot_row;
  1.1581 +  int32* cumsum_top_row;
  1.1582 +
  1.1583 +  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
  1.1584 +    return -1;
  1.1585 +  }
  1.1586 +  if (height < 0) {
  1.1587 +    height = -height;
  1.1588 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.1589 +    src_stride_argb = -src_stride_argb;
  1.1590 +  }
  1.1591 +  if (radius > height) {
  1.1592 +    radius = height;
  1.1593 +  }
  1.1594 +  if (radius > (width / 2 - 1)) {
  1.1595 +    radius = width / 2 - 1;
  1.1596 +  }
  1.1597 +  if (radius <= 0) {
  1.1598 +    return -1;
  1.1599 +  }
  1.1600 +#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
  1.1601 +  if (TestCpuFlag(kCpuHasSSE2)) {
  1.1602 +    ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
  1.1603 +    CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
  1.1604 +  }
  1.1605 +#endif
  1.1606 +  // Compute enough CumulativeSum for first row to be blurred. After this
  1.1607 +  // one row of CumulativeSum is updated at a time.
  1.1608 +  ARGBComputeCumulativeSum(src_argb, src_stride_argb,
  1.1609 +                           dst_cumsum, dst_stride32_cumsum,
  1.1610 +                           width, radius);
  1.1611 +
  1.1612 +  src_argb = src_argb + radius * src_stride_argb;
  1.1613 +  cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
  1.1614 +
  1.1615 +  max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
  1.1616 +  cumsum_top_row = &dst_cumsum[0];
  1.1617 +
  1.1618 +  for (y = 0; y < height; ++y) {
  1.1619 +    int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
  1.1620 +    int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
  1.1621 +    int area = radius * (bot_y - top_y);
  1.1622 +    int boxwidth = radius * 4;
  1.1623 +    int x;
  1.1624 +    int n;
  1.1625 +
  1.1626 +    // Increment cumsum_top_row pointer with circular buffer wrap around.
  1.1627 +    if (top_y) {
  1.1628 +      cumsum_top_row += dst_stride32_cumsum;
  1.1629 +      if (cumsum_top_row >= max_cumsum_bot_row) {
  1.1630 +        cumsum_top_row = dst_cumsum;
  1.1631 +      }
  1.1632 +    }
  1.1633 +    // Increment cumsum_bot_row pointer with circular buffer wrap around and
  1.1634 +    // then fill in a row of CumulativeSum.
  1.1635 +    if ((y + radius) < height) {
  1.1636 +      const int32* prev_cumsum_bot_row = cumsum_bot_row;
  1.1637 +      cumsum_bot_row += dst_stride32_cumsum;
  1.1638 +      if (cumsum_bot_row >= max_cumsum_bot_row) {
  1.1639 +        cumsum_bot_row = dst_cumsum;
  1.1640 +      }
  1.1641 +      ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
  1.1642 +                              width);
  1.1643 +      src_argb += src_stride_argb;
  1.1644 +    }
  1.1645 +
  1.1646 +    // Left clipped.
  1.1647 +    for (x = 0; x < radius + 1; ++x) {
  1.1648 +      CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
  1.1649 +                                boxwidth, area, &dst_argb[x * 4], 1);
  1.1650 +      area += (bot_y - top_y);
  1.1651 +      boxwidth += 4;
  1.1652 +    }
  1.1653 +
  1.1654 +    // Middle unclipped.
  1.1655 +    n = (width - 1) - radius - x + 1;
  1.1656 +    CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
  1.1657 +                              boxwidth, area, &dst_argb[x * 4], n);
  1.1658 +
  1.1659 +    // Right clipped.
  1.1660 +    for (x += n; x <= width - 1; ++x) {
  1.1661 +      area -= (bot_y - top_y);
  1.1662 +      boxwidth -= 4;
  1.1663 +      CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
  1.1664 +                                cumsum_bot_row + (x - radius - 1) * 4,
  1.1665 +                                boxwidth, area, &dst_argb[x * 4], 1);
  1.1666 +    }
  1.1667 +    dst_argb += dst_stride_argb;
  1.1668 +  }
  1.1669 +  return 0;
  1.1670 +}
  1.1671 +
  1.1672 +// Multiply ARGB image by a specified ARGB value.
  1.1673 +LIBYUV_API
  1.1674 +int ARGBShade(const uint8* src_argb, int src_stride_argb,
  1.1675 +              uint8* dst_argb, int dst_stride_argb,
  1.1676 +              int width, int height, uint32 value) {
  1.1677 +  int y;
  1.1678 +  void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
  1.1679 +                       int width, uint32 value) = ARGBShadeRow_C;
  1.1680 +  if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
  1.1681 +    return -1;
  1.1682 +  }
  1.1683 +  if (height < 0) {
  1.1684 +    height = -height;
  1.1685 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.1686 +    src_stride_argb = -src_stride_argb;
  1.1687 +  }
  1.1688 +  // Coalesce rows.
  1.1689 +  if (src_stride_argb == width * 4 &&
  1.1690 +      dst_stride_argb == width * 4) {
  1.1691 +    width *= height;
  1.1692 +    height = 1;
  1.1693 +    src_stride_argb = dst_stride_argb = 0;
  1.1694 +  }
  1.1695 +#if defined(HAS_ARGBSHADEROW_SSE2)
  1.1696 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
  1.1697 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
  1.1698 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1699 +    ARGBShadeRow = ARGBShadeRow_SSE2;
  1.1700 +  }
  1.1701 +#elif defined(HAS_ARGBSHADEROW_NEON)
  1.1702 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.1703 +    ARGBShadeRow = ARGBShadeRow_NEON;
  1.1704 +  }
  1.1705 +#endif
  1.1706 +
  1.1707 +  for (y = 0; y < height; ++y) {
  1.1708 +    ARGBShadeRow(src_argb, dst_argb, width, value);
  1.1709 +    src_argb += src_stride_argb;
  1.1710 +    dst_argb += dst_stride_argb;
  1.1711 +  }
  1.1712 +  return 0;
  1.1713 +}
  1.1714 +
  1.1715 +// Interpolate 2 ARGB images by specified amount (0 to 255).
  1.1716 +LIBYUV_API
  1.1717 +int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
  1.1718 +                    const uint8* src_argb1, int src_stride_argb1,
  1.1719 +                    uint8* dst_argb, int dst_stride_argb,
  1.1720 +                    int width, int height, int interpolation) {
  1.1721 +  int y;
  1.1722 +  void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
  1.1723 +                         ptrdiff_t src_stride, int dst_width,
  1.1724 +                         int source_y_fraction) = InterpolateRow_C;
  1.1725 +  if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
  1.1726 +    return -1;
  1.1727 +  }
  1.1728 +  // Negative height means invert the image.
  1.1729 +  if (height < 0) {
  1.1730 +    height = -height;
  1.1731 +    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
  1.1732 +    dst_stride_argb = -dst_stride_argb;
  1.1733 +  }
  1.1734 +  // Coalesce rows.
  1.1735 +  if (src_stride_argb0 == width * 4 &&
  1.1736 +      src_stride_argb1 == width * 4 &&
  1.1737 +      dst_stride_argb == width * 4) {
  1.1738 +    width *= height;
  1.1739 +    height = 1;
  1.1740 +    src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
  1.1741 +  }
  1.1742 +#if defined(HAS_INTERPOLATEROW_SSE2)
  1.1743 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
  1.1744 +    InterpolateRow = InterpolateRow_Any_SSE2;
  1.1745 +    if (IS_ALIGNED(width, 4)) {
  1.1746 +      InterpolateRow = InterpolateRow_Unaligned_SSE2;
  1.1747 +      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
  1.1748 +          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
  1.1749 +          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1750 +        InterpolateRow = InterpolateRow_SSE2;
  1.1751 +      }
  1.1752 +    }
  1.1753 +  }
  1.1754 +#endif
  1.1755 +#if defined(HAS_INTERPOLATEROW_SSSE3)
  1.1756 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
  1.1757 +    InterpolateRow = InterpolateRow_Any_SSSE3;
  1.1758 +    if (IS_ALIGNED(width, 4)) {
  1.1759 +      InterpolateRow = InterpolateRow_Unaligned_SSSE3;
  1.1760 +      if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
  1.1761 +          IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
  1.1762 +          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1763 +        InterpolateRow = InterpolateRow_SSSE3;
  1.1764 +      }
  1.1765 +    }
  1.1766 +  }
  1.1767 +#endif
  1.1768 +#if defined(HAS_INTERPOLATEROW_AVX2)
  1.1769 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
  1.1770 +    InterpolateRow = InterpolateRow_Any_AVX2;
  1.1771 +    if (IS_ALIGNED(width, 8)) {
  1.1772 +      InterpolateRow = InterpolateRow_AVX2;
  1.1773 +    }
  1.1774 +  }
  1.1775 +#endif
  1.1776 +#if defined(HAS_INTERPOLATEROW_NEON)
  1.1777 +  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
  1.1778 +    InterpolateRow = InterpolateRow_Any_NEON;
  1.1779 +    if (IS_ALIGNED(width, 4)) {
  1.1780 +      InterpolateRow = InterpolateRow_NEON;
  1.1781 +    }
  1.1782 +  }
  1.1783 +#endif
  1.1784 +#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
  1.1785 +  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
  1.1786 +      IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
  1.1787 +      IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
  1.1788 +      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
  1.1789 +    ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
  1.1790 +  }
  1.1791 +#endif
  1.1792 +
  1.1793 +  for (y = 0; y < height; ++y) {
  1.1794 +    InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
  1.1795 +                   width * 4, interpolation);
  1.1796 +    src_argb0 += src_stride_argb0;
  1.1797 +    src_argb1 += src_stride_argb1;
  1.1798 +    dst_argb += dst_stride_argb;
  1.1799 +  }
  1.1800 +  return 0;
  1.1801 +}
  1.1802 +
  1.1803 +// Shuffle ARGB channel order.  e.g. BGRA to ARGB.
  1.1804 +LIBYUV_API
  1.1805 +int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
  1.1806 +                uint8* dst_argb, int dst_stride_argb,
  1.1807 +                const uint8* shuffler, int width, int height) {
  1.1808 +  int y;
  1.1809 +  void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
  1.1810 +                         const uint8* shuffler, int pix) = ARGBShuffleRow_C;
  1.1811 +  if (!src_bgra || !dst_argb ||
  1.1812 +      width <= 0 || height == 0) {
  1.1813 +    return -1;
  1.1814 +  }
  1.1815 +  // Negative height means invert the image.
  1.1816 +  if (height < 0) {
  1.1817 +    height = -height;
  1.1818 +    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
  1.1819 +    src_stride_bgra = -src_stride_bgra;
  1.1820 +  }
  1.1821 +  // Coalesce rows.
  1.1822 +  if (src_stride_bgra == width * 4 &&
  1.1823 +      dst_stride_argb == width * 4) {
  1.1824 +    width *= height;
  1.1825 +    height = 1;
  1.1826 +    src_stride_bgra = dst_stride_argb = 0;
  1.1827 +  }
  1.1828 +#if defined(HAS_ARGBSHUFFLEROW_SSE2)
  1.1829 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
  1.1830 +    ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
  1.1831 +    if (IS_ALIGNED(width, 4)) {
  1.1832 +      ARGBShuffleRow = ARGBShuffleRow_SSE2;
  1.1833 +    }
  1.1834 +  }
  1.1835 +#endif
  1.1836 +#if defined(HAS_ARGBSHUFFLEROW_SSSE3)
  1.1837 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
  1.1838 +    ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
  1.1839 +    if (IS_ALIGNED(width, 8)) {
  1.1840 +      ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
  1.1841 +      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
  1.1842 +          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.1843 +        ARGBShuffleRow = ARGBShuffleRow_SSSE3;
  1.1844 +      }
  1.1845 +    }
  1.1846 +  }
  1.1847 +#endif
  1.1848 +#if defined(HAS_ARGBSHUFFLEROW_AVX2)
  1.1849 +  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
  1.1850 +    ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
  1.1851 +    if (IS_ALIGNED(width, 16)) {
  1.1852 +      ARGBShuffleRow = ARGBShuffleRow_AVX2;
  1.1853 +    }
  1.1854 +  }
  1.1855 +#endif
  1.1856 +#if defined(HAS_ARGBSHUFFLEROW_NEON)
  1.1857 +  if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
  1.1858 +    ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
  1.1859 +    if (IS_ALIGNED(width, 4)) {
  1.1860 +      ARGBShuffleRow = ARGBShuffleRow_NEON;
  1.1861 +    }
  1.1862 +  }
  1.1863 +#endif
  1.1864 +
  1.1865 +  for (y = 0; y < height; ++y) {
  1.1866 +    ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
  1.1867 +    src_bgra += src_stride_bgra;
  1.1868 +    dst_argb += dst_stride_argb;
  1.1869 +  }
  1.1870 +  return 0;
  1.1871 +}
  1.1872 +
  1.1873 +// Sobel ARGB effect.
  1.1874 +static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
  1.1875 +                        uint8* dst_argb, int dst_stride_argb,
  1.1876 +                        int width, int height,
  1.1877 +                        void (*SobelRow)(const uint8* src_sobelx,
  1.1878 +                                         const uint8* src_sobely,
  1.1879 +                                         uint8* dst, int width)) {
  1.1880 +  int y;
  1.1881 +  void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
  1.1882 +                         uint32 selector, int pix) = ARGBToBayerGGRow_C;
  1.1883 +  void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
  1.1884 +                    uint8* dst_sobely, int width) = SobelYRow_C;
  1.1885 +  void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
  1.1886 +                    const uint8* src_y2, uint8* dst_sobely, int width) =
  1.1887 +      SobelXRow_C;
  1.1888 +  const int kEdge = 16;  // Extra pixels at start of row for extrude/align.
  1.1889 +  if (!src_argb  || !dst_argb || width <= 0 || height == 0) {
  1.1890 +    return -1;
  1.1891 +  }
  1.1892 +  // Negative height means invert the image.
  1.1893 +  if (height < 0) {
  1.1894 +    height = -height;
  1.1895 +    src_argb  = src_argb  + (height - 1) * src_stride_argb;
  1.1896 +    src_stride_argb = -src_stride_argb;
  1.1897 +  }
  1.1898 +  // ARGBToBayer used to select G channel from ARGB.
  1.1899 +#if defined(HAS_ARGBTOBAYERGGROW_SSE2)
  1.1900 +  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
  1.1901 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
  1.1902 +    ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
  1.1903 +    if (IS_ALIGNED(width, 8)) {
  1.1904 +      ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
  1.1905 +    }
  1.1906 +  }
  1.1907 +#endif
  1.1908 +#if defined(HAS_ARGBTOBAYERROW_SSSE3)
  1.1909 +  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
  1.1910 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
  1.1911 +    ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
  1.1912 +    if (IS_ALIGNED(width, 8)) {
  1.1913 +      ARGBToBayerRow = ARGBToBayerRow_SSSE3;
  1.1914 +    }
  1.1915 +  }
  1.1916 +#endif
  1.1917 +#if defined(HAS_ARGBTOBAYERGGROW_NEON)
  1.1918 +  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
  1.1919 +    ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
  1.1920 +    if (IS_ALIGNED(width, 8)) {
  1.1921 +      ARGBToBayerRow = ARGBToBayerGGRow_NEON;
  1.1922 +    }
  1.1923 +  }
  1.1924 +#endif
  1.1925 +#if defined(HAS_SOBELYROW_SSE2)
  1.1926 +  if (TestCpuFlag(kCpuHasSSE2)) {
  1.1927 +    SobelYRow = SobelYRow_SSE2;
  1.1928 +  }
  1.1929 +#endif
  1.1930 +#if defined(HAS_SOBELYROW_NEON)
  1.1931 +  if (TestCpuFlag(kCpuHasNEON)) {
  1.1932 +    SobelYRow = SobelYRow_NEON;
  1.1933 +  }
  1.1934 +#endif
  1.1935 +#if defined(HAS_SOBELXROW_SSE2)
  1.1936 +  if (TestCpuFlag(kCpuHasSSE2)) {
  1.1937 +    SobelXRow = SobelXRow_SSE2;
  1.1938 +  }
  1.1939 +#endif
  1.1940 +#if defined(HAS_SOBELXROW_NEON)
  1.1941 +  if (TestCpuFlag(kCpuHasNEON)) {
  1.1942 +    SobelXRow = SobelXRow_NEON;
  1.1943 +  }
  1.1944 +#endif
  1.1945 +  {
  1.1946 +    // 3 rows with edges before/after.
  1.1947 +    const int kRowSize = (width + kEdge + 15) & ~15;
  1.1948 +    align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
  1.1949 +    uint8* row_sobelx = rows;
  1.1950 +    uint8* row_sobely = rows + kRowSize;
  1.1951 +    uint8* row_y = rows + kRowSize * 2;
  1.1952 +
  1.1953 +    // Convert first row.
  1.1954 +    uint8* row_y0 = row_y + kEdge;
  1.1955 +    uint8* row_y1 = row_y0 + kRowSize;
  1.1956 +    uint8* row_y2 = row_y1 + kRowSize;
  1.1957 +    ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
  1.1958 +    row_y0[-1] = row_y0[0];
  1.1959 +    memset(row_y0 + width, row_y0[width - 1], 16);  // Extrude 16 for valgrind.
  1.1960 +    ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
  1.1961 +    row_y1[-1] = row_y1[0];
  1.1962 +    memset(row_y1 + width, row_y1[width - 1], 16);
  1.1963 +    memset(row_y2 + width, 0, 16);
  1.1964 +
  1.1965 +    for (y = 0; y < height; ++y) {
  1.1966 +      // Convert next row of ARGB to Y.
  1.1967 +      if (y < (height - 1)) {
  1.1968 +        src_argb += src_stride_argb;
  1.1969 +      }
  1.1970 +      ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
  1.1971 +      row_y2[-1] = row_y2[0];
  1.1972 +      row_y2[width] = row_y2[width - 1];
  1.1973 +
  1.1974 +      SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
  1.1975 +      SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
  1.1976 +      SobelRow(row_sobelx, row_sobely, dst_argb, width);
  1.1977 +
  1.1978 +      // Cycle thru circular queue of 3 row_y buffers.
  1.1979 +      {
  1.1980 +        uint8* row_yt = row_y0;
  1.1981 +        row_y0 = row_y1;
  1.1982 +        row_y1 = row_y2;
  1.1983 +        row_y2 = row_yt;
  1.1984 +      }
  1.1985 +
  1.1986 +      dst_argb += dst_stride_argb;
  1.1987 +    }
  1.1988 +    free_aligned_buffer_64(rows);
  1.1989 +  }
  1.1990 +  return 0;
  1.1991 +}
  1.1992 +
  1.1993 +// Sobel ARGB effect.
  1.1994 +LIBYUV_API
  1.1995 +int ARGBSobel(const uint8* src_argb, int src_stride_argb,
  1.1996 +              uint8* dst_argb, int dst_stride_argb,
  1.1997 +              int width, int height) {
  1.1998 +  void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
  1.1999 +                   uint8* dst_argb, int width) = SobelRow_C;
  1.2000 +#if defined(HAS_SOBELROW_SSE2)
  1.2001 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
  1.2002 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.2003 +    SobelRow = SobelRow_SSE2;
  1.2004 +  }
  1.2005 +#endif
  1.2006 +#if defined(HAS_SOBELROW_NEON)
  1.2007 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.2008 +    SobelRow = SobelRow_NEON;
  1.2009 +  }
  1.2010 +#endif
  1.2011 +  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
  1.2012 +                      width, height, SobelRow);
  1.2013 +}
  1.2014 +
  1.2015 +// Sobel ARGB effect with planar output.
  1.2016 +LIBYUV_API
  1.2017 +int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
  1.2018 +                     uint8* dst_y, int dst_stride_y,
  1.2019 +                     int width, int height) {
  1.2020 +  void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
  1.2021 +                          uint8* dst_, int width) = SobelToPlaneRow_C;
  1.2022 +#if defined(HAS_SOBELTOPLANEROW_SSE2)
  1.2023 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
  1.2024 +      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
  1.2025 +    SobelToPlaneRow = SobelToPlaneRow_SSE2;
  1.2026 +  }
  1.2027 +#endif
  1.2028 +#if defined(HAS_SOBELTOPLANEROW_NEON)
  1.2029 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
  1.2030 +    SobelToPlaneRow = SobelToPlaneRow_NEON;
  1.2031 +  }
  1.2032 +#endif
  1.2033 +  return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
  1.2034 +                      width, height, SobelToPlaneRow);
  1.2035 +}
  1.2036 +
  1.2037 +// SobelXY ARGB effect.
  1.2038 +// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B.  G = Sobel.
  1.2039 +LIBYUV_API
  1.2040 +int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
  1.2041 +                uint8* dst_argb, int dst_stride_argb,
  1.2042 +                int width, int height) {
  1.2043 +  void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
  1.2044 +                     uint8* dst_argb, int width) = SobelXYRow_C;
  1.2045 +#if defined(HAS_SOBELXYROW_SSE2)
  1.2046 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
  1.2047 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
  1.2048 +    SobelXYRow = SobelXYRow_SSE2;
  1.2049 +  }
  1.2050 +#endif
  1.2051 +#if defined(HAS_SOBELXYROW_NEON)
  1.2052 +  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
  1.2053 +    SobelXYRow = SobelXYRow_NEON;
  1.2054 +  }
  1.2055 +#endif
  1.2056 +  return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
  1.2057 +                      width, height, SobelXYRow);
  1.2058 +}
  1.2059 +
  1.2060 +// Apply a 4x4 polynomial to each ARGB pixel.
  1.2061 +LIBYUV_API
  1.2062 +int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
  1.2063 +                   uint8* dst_argb, int dst_stride_argb,
  1.2064 +                   const float* poly,
  1.2065 +                   int width, int height) {
  1.2066 +  int y;
  1.2067 +  void (*ARGBPolynomialRow)(const uint8* src_argb,
  1.2068 +                            uint8* dst_argb, const float* poly,
  1.2069 +                            int width) = ARGBPolynomialRow_C;
  1.2070 +  if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
  1.2071 +    return -1;
  1.2072 +  }
  1.2073 +  // Negative height means invert the image.
  1.2074 +  if (height < 0) {
  1.2075 +    height = -height;
  1.2076 +    src_argb  = src_argb  + (height - 1) * src_stride_argb;
  1.2077 +    src_stride_argb = -src_stride_argb;
  1.2078 +  }
  1.2079 +  // Coalesce rows.
  1.2080 +  if (src_stride_argb == width * 4 &&
  1.2081 +      dst_stride_argb == width * 4) {
  1.2082 +    width *= height;
  1.2083 +    height = 1;
  1.2084 +    src_stride_argb = dst_stride_argb = 0;
  1.2085 +  }
  1.2086 +#if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
  1.2087 +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
  1.2088 +    ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
  1.2089 +  }
  1.2090 +#endif
  1.2091 +#if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
  1.2092 +  if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
  1.2093 +      IS_ALIGNED(width, 2)) {
  1.2094 +    ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
  1.2095 +  }
  1.2096 +#endif
  1.2097 +
  1.2098 +  for (y = 0; y < height; ++y) {
  1.2099 +    ARGBPolynomialRow(src_argb, dst_argb, poly, width);
  1.2100 +    src_argb += src_stride_argb;
  1.2101 +    dst_argb += dst_stride_argb;
  1.2102 +  }
  1.2103 +  return 0;
  1.2104 +}
  1.2105 +
  1.2106 +// Apply a lumacolortable to each ARGB pixel.
  1.2107 +LIBYUV_API
  1.2108 +int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
  1.2109 +                       uint8* dst_argb, int dst_stride_argb,
  1.2110 +                       const uint8* luma,
  1.2111 +                       int width, int height) {
  1.2112 +  int y;
  1.2113 +  void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
  1.2114 +      int width, const uint8* luma, const uint32 lumacoeff) =
  1.2115 +      ARGBLumaColorTableRow_C;
  1.2116 +  if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
  1.2117 +    return -1;
  1.2118 +  }
  1.2119 +  // Negative height means invert the image.
  1.2120 +  if (height < 0) {
  1.2121 +    height = -height;
  1.2122 +    src_argb  = src_argb  + (height - 1) * src_stride_argb;
  1.2123 +    src_stride_argb = -src_stride_argb;
  1.2124 +  }
  1.2125 +  // Coalesce rows.
  1.2126 +  if (src_stride_argb == width * 4 &&
  1.2127 +      dst_stride_argb == width * 4) {
  1.2128 +    width *= height;
  1.2129 +    height = 1;
  1.2130 +    src_stride_argb = dst_stride_argb = 0;
  1.2131 +  }
  1.2132 +#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
  1.2133 +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
  1.2134 +    ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
  1.2135 +  }
  1.2136 +#endif
  1.2137 +
  1.2138 +  for (y = 0; y < height; ++y) {
  1.2139 +    ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
  1.2140 +    src_argb += src_stride_argb;
  1.2141 +    dst_argb += dst_stride_argb;
  1.2142 +  }
  1.2143 +  return 0;
  1.2144 +}
  1.2145 +
  1.2146 +// Copy Alpha from one ARGB image to another.
  1.2147 +LIBYUV_API
  1.2148 +int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
  1.2149 +                  uint8* dst_argb, int dst_stride_argb,
  1.2150 +                  int width, int height) {
  1.2151 +  int y;
  1.2152 +  void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
  1.2153 +      ARGBCopyAlphaRow_C;
  1.2154 +  if (!src_argb || !dst_argb || width <= 0 || height == 0) {
  1.2155 +    return -1;
  1.2156 +  }
  1.2157 +  // Negative height means invert the image.
  1.2158 +  if (height < 0) {
  1.2159 +    height = -height;
  1.2160 +    src_argb = src_argb + (height - 1) * src_stride_argb;
  1.2161 +    src_stride_argb = -src_stride_argb;
  1.2162 +  }
  1.2163 +  // Coalesce rows.
  1.2164 +  if (src_stride_argb == width * 4 &&
  1.2165 +      dst_stride_argb == width * 4) {
  1.2166 +    width *= height;
  1.2167 +    height = 1;
  1.2168 +    src_stride_argb = dst_stride_argb = 0;
  1.2169 +  }
  1.2170 +#if defined(HAS_ARGBCOPYALPHAROW_SSE2)
  1.2171 +  if (TestCpuFlag(kCpuHasSSE2) &&
  1.2172 +      IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
  1.2173 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
  1.2174 +      IS_ALIGNED(width, 8)) {
  1.2175 +    ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
  1.2176 +  }
  1.2177 +#endif
  1.2178 +#if defined(HAS_ARGBCOPYALPHAROW_AVX2)
  1.2179 +  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
  1.2180 +    ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
  1.2181 +  }
  1.2182 +#endif
  1.2183 +
  1.2184 +  for (y = 0; y < height; ++y) {
  1.2185 +    ARGBCopyAlphaRow(src_argb, dst_argb, width);
  1.2186 +    src_argb += src_stride_argb;
  1.2187 +    dst_argb += dst_stride_argb;
  1.2188 +  }
  1.2189 +  return 0;
  1.2190 +}
  1.2191 +
  1.2192 +// Copy a planar Y channel to the alpha channel of a destination ARGB image.
  1.2193 +LIBYUV_API
  1.2194 +int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
  1.2195 +                     uint8* dst_argb, int dst_stride_argb,
  1.2196 +                     int width, int height) {
  1.2197 +  int y;
  1.2198 +  void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
  1.2199 +      ARGBCopyYToAlphaRow_C;
  1.2200 +  if (!src_y || !dst_argb || width <= 0 || height == 0) {
  1.2201 +    return -1;
  1.2202 +  }
  1.2203 +  // Negative height means invert the image.
  1.2204 +  if (height < 0) {
  1.2205 +    height = -height;
  1.2206 +    src_y = src_y + (height - 1) * src_stride_y;
  1.2207 +    src_stride_y = -src_stride_y;
  1.2208 +  }
  1.2209 +  // Coalesce rows.
  1.2210 +  if (src_stride_y == width &&
  1.2211 +      dst_stride_argb == width * 4) {
  1.2212 +    width *= height;
  1.2213 +    height = 1;
  1.2214 +    src_stride_y = dst_stride_argb = 0;
  1.2215 +  }
  1.2216 +#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
  1.2217 +  if (TestCpuFlag(kCpuHasSSE2) &&
  1.2218 +      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
  1.2219 +      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
  1.2220 +      IS_ALIGNED(width, 8)) {
  1.2221 +    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
  1.2222 +  }
  1.2223 +#endif
  1.2224 +#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
  1.2225 +  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
  1.2226 +    ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
  1.2227 +  }
  1.2228 +#endif
  1.2229 +
  1.2230 +  for (y = 0; y < height; ++y) {
  1.2231 +    ARGBCopyYToAlphaRow(src_y, dst_argb, width);
  1.2232 +    src_y += src_stride_y;
  1.2233 +    dst_argb += dst_stride_argb;
  1.2234 +  }
  1.2235 +  return 0;
  1.2236 +}
  1.2237 +
  1.2238 +#ifdef __cplusplus
  1.2239 +}  // extern "C"
  1.2240 +}  // namespace libyuv
  1.2241 +#endif

mercurial