media/libyuv/source/row_common.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libyuv/source/row_common.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,2247 @@
     1.4 +/*
     1.5 + *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS. All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include "libyuv/row.h"
    1.15 +
    1.16 +#include <string.h>  // For memcpy and memset.
    1.17 +
    1.18 +#include "libyuv/basic_types.h"
    1.19 +
    1.20 +#ifdef __cplusplus
    1.21 +namespace libyuv {
    1.22 +extern "C" {
    1.23 +#endif
    1.24 +
    1.25 +// llvm x86 is poor at ternary operator, so use branchless min/max.
    1.26 +
    1.27 +#define USE_BRANCHLESS 1
    1.28 +#if USE_BRANCHLESS
    1.29 +static __inline int32 clamp0(int32 v) {
    1.30 +  return ((-(v) >> 31) & (v));
    1.31 +}
    1.32 +
    1.33 +static __inline int32 clamp255(int32 v) {
    1.34 +  return (((255 - (v)) >> 31) | (v)) & 255;
    1.35 +}
    1.36 +
    1.37 +static __inline uint32 Clamp(int32 val) {
    1.38 +  int v = clamp0(val);
    1.39 +  return (uint32)(clamp255(v));
    1.40 +}
    1.41 +
    1.42 +static __inline uint32 Abs(int32 v) {
    1.43 +  int m = v >> 31;
    1.44 +  return (v + m) ^ m;
    1.45 +}
    1.46 +#else  // USE_BRANCHLESS
    1.47 +static __inline int32 clamp0(int32 v) {
    1.48 +  return (v < 0) ? 0 : v;
    1.49 +}
    1.50 +
    1.51 +static __inline int32 clamp255(int32 v) {
    1.52 +  return (v > 255) ? 255 : v;
    1.53 +}
    1.54 +
    1.55 +static __inline uint32 Clamp(int32 val) {
    1.56 +  int v = clamp0(val);
    1.57 +  return (uint32)(clamp255(v));
    1.58 +}
    1.59 +
    1.60 +static __inline uint32 Abs(int32 v) {
    1.61 +  return (v < 0) ? -v : v;
    1.62 +}
    1.63 +#endif  // USE_BRANCHLESS
    1.64 +
    1.65 +#ifdef LIBYUV_LITTLE_ENDIAN
    1.66 +#define WRITEWORD(p, v) *(uint32*)(p) = v
    1.67 +#else
    1.68 +static inline void WRITEWORD(uint8* p, uint32 v) {
    1.69 +  p[0] = (uint8)(v & 255);
    1.70 +  p[1] = (uint8)((v >> 8) & 255);
    1.71 +  p[2] = (uint8)((v >> 16) & 255);
    1.72 +  p[3] = (uint8)((v >> 24) & 255);
    1.73 +}
    1.74 +#endif
    1.75 +
    1.76 +void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
    1.77 +  int x;
    1.78 +  for (x = 0; x < width; ++x) {
    1.79 +    uint8 b = src_rgb24[0];
    1.80 +    uint8 g = src_rgb24[1];
    1.81 +    uint8 r = src_rgb24[2];
    1.82 +    dst_argb[0] = b;
    1.83 +    dst_argb[1] = g;
    1.84 +    dst_argb[2] = r;
    1.85 +    dst_argb[3] = 255u;
    1.86 +    dst_argb += 4;
    1.87 +    src_rgb24 += 3;
    1.88 +  }
    1.89 +}
    1.90 +
    1.91 +void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
    1.92 +  int x;
    1.93 +  for (x = 0; x < width; ++x) {
    1.94 +    uint8 r = src_raw[0];
    1.95 +    uint8 g = src_raw[1];
    1.96 +    uint8 b = src_raw[2];
    1.97 +    dst_argb[0] = b;
    1.98 +    dst_argb[1] = g;
    1.99 +    dst_argb[2] = r;
   1.100 +    dst_argb[3] = 255u;
   1.101 +    dst_argb += 4;
   1.102 +    src_raw += 3;
   1.103 +  }
   1.104 +}
   1.105 +
   1.106 +void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
   1.107 +  int x;
   1.108 +  for (x = 0; x < width; ++x) {
   1.109 +    uint8 b = src_rgb565[0] & 0x1f;
   1.110 +    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
   1.111 +    uint8 r = src_rgb565[1] >> 3;
   1.112 +    dst_argb[0] = (b << 3) | (b >> 2);
   1.113 +    dst_argb[1] = (g << 2) | (g >> 4);
   1.114 +    dst_argb[2] = (r << 3) | (r >> 2);
   1.115 +    dst_argb[3] = 255u;
   1.116 +    dst_argb += 4;
   1.117 +    src_rgb565 += 2;
   1.118 +  }
   1.119 +}
   1.120 +
   1.121 +void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
   1.122 +                         int width) {
   1.123 +  int x;
   1.124 +  for (x = 0; x < width; ++x) {
   1.125 +    uint8 b = src_argb1555[0] & 0x1f;
   1.126 +    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
   1.127 +    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
   1.128 +    uint8 a = src_argb1555[1] >> 7;
   1.129 +    dst_argb[0] = (b << 3) | (b >> 2);
   1.130 +    dst_argb[1] = (g << 3) | (g >> 2);
   1.131 +    dst_argb[2] = (r << 3) | (r >> 2);
   1.132 +    dst_argb[3] = -a;
   1.133 +    dst_argb += 4;
   1.134 +    src_argb1555 += 2;
   1.135 +  }
   1.136 +}
   1.137 +
   1.138 +void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
   1.139 +                         int width) {
   1.140 +  int x;
   1.141 +  for (x = 0; x < width; ++x) {
   1.142 +    uint8 b = src_argb4444[0] & 0x0f;
   1.143 +    uint8 g = src_argb4444[0] >> 4;
   1.144 +    uint8 r = src_argb4444[1] & 0x0f;
   1.145 +    uint8 a = src_argb4444[1] >> 4;
   1.146 +    dst_argb[0] = (b << 4) | b;
   1.147 +    dst_argb[1] = (g << 4) | g;
   1.148 +    dst_argb[2] = (r << 4) | r;
   1.149 +    dst_argb[3] = (a << 4) | a;
   1.150 +    dst_argb += 4;
   1.151 +    src_argb4444 += 2;
   1.152 +  }
   1.153 +}
   1.154 +
   1.155 +void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
   1.156 +  int x;
   1.157 +  for (x = 0; x < width; ++x) {
   1.158 +    uint8 b = src_argb[0];
   1.159 +    uint8 g = src_argb[1];
   1.160 +    uint8 r = src_argb[2];
   1.161 +    dst_rgb[0] = b;
   1.162 +    dst_rgb[1] = g;
   1.163 +    dst_rgb[2] = r;
   1.164 +    dst_rgb += 3;
   1.165 +    src_argb += 4;
   1.166 +  }
   1.167 +}
   1.168 +
   1.169 +void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
   1.170 +  int x;
   1.171 +  for (x = 0; x < width; ++x) {
   1.172 +    uint8 b = src_argb[0];
   1.173 +    uint8 g = src_argb[1];
   1.174 +    uint8 r = src_argb[2];
   1.175 +    dst_rgb[0] = r;
   1.176 +    dst_rgb[1] = g;
   1.177 +    dst_rgb[2] = b;
   1.178 +    dst_rgb += 3;
   1.179 +    src_argb += 4;
   1.180 +  }
   1.181 +}
   1.182 +
   1.183 +void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
   1.184 +  int x;
   1.185 +  for (x = 0; x < width - 1; x += 2) {
   1.186 +    uint8 b0 = src_argb[0] >> 3;
   1.187 +    uint8 g0 = src_argb[1] >> 2;
   1.188 +    uint8 r0 = src_argb[2] >> 3;
   1.189 +    uint8 b1 = src_argb[4] >> 3;
   1.190 +    uint8 g1 = src_argb[5] >> 2;
   1.191 +    uint8 r1 = src_argb[6] >> 3;
   1.192 +    WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
   1.193 +              (b1 << 16) | (g1 << 21) | (r1 << 27));
   1.194 +    dst_rgb += 4;
   1.195 +    src_argb += 8;
   1.196 +  }
   1.197 +  if (width & 1) {
   1.198 +    uint8 b0 = src_argb[0] >> 3;
   1.199 +    uint8 g0 = src_argb[1] >> 2;
   1.200 +    uint8 r0 = src_argb[2] >> 3;
   1.201 +    *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
   1.202 +  }
   1.203 +}
   1.204 +
   1.205 +void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
   1.206 +  int x;
   1.207 +  for (x = 0; x < width - 1; x += 2) {
   1.208 +    uint8 b0 = src_argb[0] >> 3;
   1.209 +    uint8 g0 = src_argb[1] >> 3;
   1.210 +    uint8 r0 = src_argb[2] >> 3;
   1.211 +    uint8 a0 = src_argb[3] >> 7;
   1.212 +    uint8 b1 = src_argb[4] >> 3;
   1.213 +    uint8 g1 = src_argb[5] >> 3;
   1.214 +    uint8 r1 = src_argb[6] >> 3;
   1.215 +    uint8 a1 = src_argb[7] >> 7;
   1.216 +    *(uint32*)(dst_rgb) =
   1.217 +        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
   1.218 +        (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
   1.219 +    dst_rgb += 4;
   1.220 +    src_argb += 8;
   1.221 +  }
   1.222 +  if (width & 1) {
   1.223 +    uint8 b0 = src_argb[0] >> 3;
   1.224 +    uint8 g0 = src_argb[1] >> 3;
   1.225 +    uint8 r0 = src_argb[2] >> 3;
   1.226 +    uint8 a0 = src_argb[3] >> 7;
   1.227 +    *(uint16*)(dst_rgb) =
   1.228 +        b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
   1.229 +  }
   1.230 +}
   1.231 +
   1.232 +void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
   1.233 +  int x;
   1.234 +  for (x = 0; x < width - 1; x += 2) {
   1.235 +    uint8 b0 = src_argb[0] >> 4;
   1.236 +    uint8 g0 = src_argb[1] >> 4;
   1.237 +    uint8 r0 = src_argb[2] >> 4;
   1.238 +    uint8 a0 = src_argb[3] >> 4;
   1.239 +    uint8 b1 = src_argb[4] >> 4;
   1.240 +    uint8 g1 = src_argb[5] >> 4;
   1.241 +    uint8 r1 = src_argb[6] >> 4;
   1.242 +    uint8 a1 = src_argb[7] >> 4;
   1.243 +    *(uint32*)(dst_rgb) =
   1.244 +        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
   1.245 +        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
   1.246 +    dst_rgb += 4;
   1.247 +    src_argb += 8;
   1.248 +  }
   1.249 +  if (width & 1) {
   1.250 +    uint8 b0 = src_argb[0] >> 4;
   1.251 +    uint8 g0 = src_argb[1] >> 4;
   1.252 +    uint8 r0 = src_argb[2] >> 4;
   1.253 +    uint8 a0 = src_argb[3] >> 4;
   1.254 +    *(uint16*)(dst_rgb) =
   1.255 +        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
   1.256 +  }
   1.257 +}
   1.258 +
   1.259 +static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
   1.260 +  return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
   1.261 +}
   1.262 +
   1.263 +static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
   1.264 +  return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
   1.265 +}
   1.266 +static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
   1.267 +  return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
   1.268 +}
   1.269 +
   1.270 +#define MAKEROWY(NAME, R, G, B, BPP) \
   1.271 +void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
   1.272 +  int x;                                                                       \
   1.273 +  for (x = 0; x < width; ++x) {                                                \
   1.274 +    dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
   1.275 +    src_argb0 += BPP;                                                          \
   1.276 +    dst_y += 1;                                                                \
   1.277 +  }                                                                            \
   1.278 +}                                                                              \
   1.279 +void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
   1.280 +                       uint8* dst_u, uint8* dst_v, int width) {                \
   1.281 +  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
   1.282 +  int x;                                                                       \
   1.283 +  for (x = 0; x < width - 1; x += 2) {                                         \
   1.284 +    uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
   1.285 +               src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
   1.286 +    uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
   1.287 +               src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
   1.288 +    uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
   1.289 +               src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
   1.290 +    dst_u[0] = RGBToU(ar, ag, ab);                                             \
   1.291 +    dst_v[0] = RGBToV(ar, ag, ab);                                             \
   1.292 +    src_rgb0 += BPP * 2;                                                       \
   1.293 +    src_rgb1 += BPP * 2;                                                       \
   1.294 +    dst_u += 1;                                                                \
   1.295 +    dst_v += 1;                                                                \
   1.296 +  }                                                                            \
   1.297 +  if (width & 1) {                                                             \
   1.298 +    uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
   1.299 +    uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
   1.300 +    uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
   1.301 +    dst_u[0] = RGBToU(ar, ag, ab);                                             \
   1.302 +    dst_v[0] = RGBToV(ar, ag, ab);                                             \
   1.303 +  }                                                                            \
   1.304 +}
   1.305 +
   1.306 +MAKEROWY(ARGB, 2, 1, 0, 4)
   1.307 +MAKEROWY(BGRA, 1, 2, 3, 4)
   1.308 +MAKEROWY(ABGR, 0, 1, 2, 4)
   1.309 +MAKEROWY(RGBA, 3, 2, 1, 4)
   1.310 +MAKEROWY(RGB24, 2, 1, 0, 3)
   1.311 +MAKEROWY(RAW, 0, 1, 2, 3)
   1.312 +#undef MAKEROWY
   1.313 +
   1.314 +// JPeg uses a variation on BT.601-1 full range
   1.315 +// y =  0.29900 * r + 0.58700 * g + 0.11400 * b
   1.316 +// u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
   1.317 +// v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
   1.318 +// BT.601 Mpeg range uses:
   1.319 +// b 0.1016 * 255 = 25.908 = 25
   1.320 +// g 0.5078 * 255 = 129.489 = 129
   1.321 +// r 0.2578 * 255 = 65.739 = 66
   1.322 +// JPeg 8 bit Y (not used):
   1.323 +// b 0.11400 * 256 = 29.184 = 29
   1.324 +// g 0.58700 * 256 = 150.272 = 150
   1.325 +// r 0.29900 * 256 = 76.544 = 77
   1.326 +// JPeg 7 bit Y:
   1.327 +// b 0.11400 * 128 = 14.592 = 15
   1.328 +// g 0.58700 * 128 = 75.136 = 75
   1.329 +// r 0.29900 * 128 = 38.272 = 38
   1.330 +// JPeg 8 bit U:
   1.331 +// b  0.50000 * 255 = 127.5 = 127
   1.332 +// g -0.33126 * 255 = -84.4713 = -84
   1.333 +// r -0.16874 * 255 = -43.0287 = -43
   1.334 +// JPeg 8 bit V:
   1.335 +// b -0.08131 * 255 = -20.73405 = -20
   1.336 +// g -0.41869 * 255 = -106.76595 = -107
   1.337 +// r  0.50000 * 255 = 127.5 = 127
   1.338 +
   1.339 +static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
   1.340 +  return (38 * r + 75 * g +  15 * b + 64) >> 7;
   1.341 +}
   1.342 +
   1.343 +static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
   1.344 +  return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
   1.345 +}
   1.346 +static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
   1.347 +  return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
   1.348 +}
   1.349 +
   1.350 +#define AVGB(a, b) (((a) + (b) + 1) >> 1)
   1.351 +
   1.352 +#define MAKEROWYJ(NAME, R, G, B, BPP) \
   1.353 +void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
   1.354 +  int x;                                                                       \
   1.355 +  for (x = 0; x < width; ++x) {                                                \
   1.356 +    dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
   1.357 +    src_argb0 += BPP;                                                          \
   1.358 +    dst_y += 1;                                                                \
   1.359 +  }                                                                            \
   1.360 +}                                                                              \
   1.361 +void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
   1.362 +                        uint8* dst_u, uint8* dst_v, int width) {               \
   1.363 +  const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
   1.364 +  int x;                                                                       \
   1.365 +  for (x = 0; x < width - 1; x += 2) {                                         \
   1.366 +    uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
   1.367 +                    AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
   1.368 +    uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
   1.369 +                    AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
   1.370 +    uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
   1.371 +                    AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
   1.372 +    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
   1.373 +    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
   1.374 +    src_rgb0 += BPP * 2;                                                       \
   1.375 +    src_rgb1 += BPP * 2;                                                       \
   1.376 +    dst_u += 1;                                                                \
   1.377 +    dst_v += 1;                                                                \
   1.378 +  }                                                                            \
   1.379 +  if (width & 1) {                                                             \
   1.380 +    uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
   1.381 +    uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
   1.382 +    uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
   1.383 +    dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
   1.384 +    dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
   1.385 +  }                                                                            \
   1.386 +}
   1.387 +
   1.388 +MAKEROWYJ(ARGB, 2, 1, 0, 4)
   1.389 +#undef MAKEROWYJ
   1.390 +
   1.391 +void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
   1.392 +  int x;
   1.393 +  for (x = 0; x < width; ++x) {
   1.394 +    uint8 b = src_rgb565[0] & 0x1f;
   1.395 +    uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
   1.396 +    uint8 r = src_rgb565[1] >> 3;
   1.397 +    b = (b << 3) | (b >> 2);
   1.398 +    g = (g << 2) | (g >> 4);
   1.399 +    r = (r << 3) | (r >> 2);
   1.400 +    dst_y[0] = RGBToY(r, g, b);
   1.401 +    src_rgb565 += 2;
   1.402 +    dst_y += 1;
   1.403 +  }
   1.404 +}
   1.405 +
   1.406 +void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
   1.407 +  int x;
   1.408 +  for (x = 0; x < width; ++x) {
   1.409 +    uint8 b = src_argb1555[0] & 0x1f;
   1.410 +    uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
   1.411 +    uint8 r = (src_argb1555[1] & 0x7c) >> 2;
   1.412 +    b = (b << 3) | (b >> 2);
   1.413 +    g = (g << 3) | (g >> 2);
   1.414 +    r = (r << 3) | (r >> 2);
   1.415 +    dst_y[0] = RGBToY(r, g, b);
   1.416 +    src_argb1555 += 2;
   1.417 +    dst_y += 1;
   1.418 +  }
   1.419 +}
   1.420 +
   1.421 +void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
   1.422 +  int x;
   1.423 +  for (x = 0; x < width; ++x) {
   1.424 +    uint8 b = src_argb4444[0] & 0x0f;
   1.425 +    uint8 g = src_argb4444[0] >> 4;
   1.426 +    uint8 r = src_argb4444[1] & 0x0f;
   1.427 +    b = (b << 4) | b;
   1.428 +    g = (g << 4) | g;
   1.429 +    r = (r << 4) | r;
   1.430 +    dst_y[0] = RGBToY(r, g, b);
   1.431 +    src_argb4444 += 2;
   1.432 +    dst_y += 1;
   1.433 +  }
   1.434 +}
   1.435 +
   1.436 +void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
   1.437 +                     uint8* dst_u, uint8* dst_v, int width) {
   1.438 +  const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
   1.439 +  int x;
   1.440 +  for (x = 0; x < width - 1; x += 2) {
   1.441 +    uint8 b0 = src_rgb565[0] & 0x1f;
   1.442 +    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
   1.443 +    uint8 r0 = src_rgb565[1] >> 3;
   1.444 +    uint8 b1 = src_rgb565[2] & 0x1f;
   1.445 +    uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
   1.446 +    uint8 r1 = src_rgb565[3] >> 3;
   1.447 +    uint8 b2 = next_rgb565[0] & 0x1f;
   1.448 +    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
   1.449 +    uint8 r2 = next_rgb565[1] >> 3;
   1.450 +    uint8 b3 = next_rgb565[2] & 0x1f;
   1.451 +    uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
   1.452 +    uint8 r3 = next_rgb565[3] >> 3;
   1.453 +    uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
   1.454 +    uint8 g = (g0 + g1 + g2 + g3);
   1.455 +    uint8 r = (r0 + r1 + r2 + r3);
   1.456 +    b = (b << 1) | (b >> 6);  // 787 -> 888.
   1.457 +    r = (r << 1) | (r >> 6);
   1.458 +    dst_u[0] = RGBToU(r, g, b);
   1.459 +    dst_v[0] = RGBToV(r, g, b);
   1.460 +    src_rgb565 += 4;
   1.461 +    next_rgb565 += 4;
   1.462 +    dst_u += 1;
   1.463 +    dst_v += 1;
   1.464 +  }
   1.465 +  if (width & 1) {
   1.466 +    uint8 b0 = src_rgb565[0] & 0x1f;
   1.467 +    uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
   1.468 +    uint8 r0 = src_rgb565[1] >> 3;
   1.469 +    uint8 b2 = next_rgb565[0] & 0x1f;
   1.470 +    uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
   1.471 +    uint8 r2 = next_rgb565[1] >> 3;
   1.472 +    uint8 b = (b0 + b2);  // 565 * 2 = 676.
   1.473 +    uint8 g = (g0 + g2);
   1.474 +    uint8 r = (r0 + r2);
   1.475 +    b = (b << 2) | (b >> 4);  // 676 -> 888
   1.476 +    g = (g << 1) | (g >> 6);
   1.477 +    r = (r << 2) | (r >> 4);
   1.478 +    dst_u[0] = RGBToU(r, g, b);
   1.479 +    dst_v[0] = RGBToV(r, g, b);
   1.480 +  }
   1.481 +}
   1.482 +
   1.483 +void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
   1.484 +                       uint8* dst_u, uint8* dst_v, int width) {
   1.485 +  const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
   1.486 +  int x;
   1.487 +  for (x = 0; x < width - 1; x += 2) {
   1.488 +    uint8 b0 = src_argb1555[0] & 0x1f;
   1.489 +    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
   1.490 +    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
   1.491 +    uint8 b1 = src_argb1555[2] & 0x1f;
   1.492 +    uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
   1.493 +    uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
   1.494 +    uint8 b2 = next_argb1555[0] & 0x1f;
   1.495 +    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
   1.496 +    uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
   1.497 +    uint8 b3 = next_argb1555[2] & 0x1f;
   1.498 +    uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
   1.499 +    uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
   1.500 +    uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
   1.501 +    uint8 g = (g0 + g1 + g2 + g3);
   1.502 +    uint8 r = (r0 + r1 + r2 + r3);
   1.503 +    b = (b << 1) | (b >> 6);  // 777 -> 888.
   1.504 +    g = (g << 1) | (g >> 6);
   1.505 +    r = (r << 1) | (r >> 6);
   1.506 +    dst_u[0] = RGBToU(r, g, b);
   1.507 +    dst_v[0] = RGBToV(r, g, b);
   1.508 +    src_argb1555 += 4;
   1.509 +    next_argb1555 += 4;
   1.510 +    dst_u += 1;
   1.511 +    dst_v += 1;
   1.512 +  }
   1.513 +  if (width & 1) {
   1.514 +    uint8 b0 = src_argb1555[0] & 0x1f;
   1.515 +    uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
   1.516 +    uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
   1.517 +    uint8 b2 = next_argb1555[0] & 0x1f;
   1.518 +    uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
   1.519 +    uint8 r2 = next_argb1555[1] >> 3;
   1.520 +    uint8 b = (b0 + b2);  // 555 * 2 = 666.
   1.521 +    uint8 g = (g0 + g2);
   1.522 +    uint8 r = (r0 + r2);
   1.523 +    b = (b << 2) | (b >> 4);  // 666 -> 888.
   1.524 +    g = (g << 2) | (g >> 4);
   1.525 +    r = (r << 2) | (r >> 4);
   1.526 +    dst_u[0] = RGBToU(r, g, b);
   1.527 +    dst_v[0] = RGBToV(r, g, b);
   1.528 +  }
   1.529 +}
   1.530 +
   1.531 +void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
   1.532 +                       uint8* dst_u, uint8* dst_v, int width) {
   1.533 +  const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
   1.534 +  int x;
   1.535 +  for (x = 0; x < width - 1; x += 2) {
   1.536 +    uint8 b0 = src_argb4444[0] & 0x0f;
   1.537 +    uint8 g0 = src_argb4444[0] >> 4;
   1.538 +    uint8 r0 = src_argb4444[1] & 0x0f;
   1.539 +    uint8 b1 = src_argb4444[2] & 0x0f;
   1.540 +    uint8 g1 = src_argb4444[2] >> 4;
   1.541 +    uint8 r1 = src_argb4444[3] & 0x0f;
   1.542 +    uint8 b2 = next_argb4444[0] & 0x0f;
   1.543 +    uint8 g2 = next_argb4444[0] >> 4;
   1.544 +    uint8 r2 = next_argb4444[1] & 0x0f;
   1.545 +    uint8 b3 = next_argb4444[2] & 0x0f;
   1.546 +    uint8 g3 = next_argb4444[2] >> 4;
   1.547 +    uint8 r3 = next_argb4444[3] & 0x0f;
   1.548 +    uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
   1.549 +    uint8 g = (g0 + g1 + g2 + g3);
   1.550 +    uint8 r = (r0 + r1 + r2 + r3);
   1.551 +    b = (b << 2) | (b >> 4);  // 666 -> 888.
   1.552 +    g = (g << 2) | (g >> 4);
   1.553 +    r = (r << 2) | (r >> 4);
   1.554 +    dst_u[0] = RGBToU(r, g, b);
   1.555 +    dst_v[0] = RGBToV(r, g, b);
   1.556 +    src_argb4444 += 4;
   1.557 +    next_argb4444 += 4;
   1.558 +    dst_u += 1;
   1.559 +    dst_v += 1;
   1.560 +  }
   1.561 +  if (width & 1) {
   1.562 +    uint8 b0 = src_argb4444[0] & 0x0f;
   1.563 +    uint8 g0 = src_argb4444[0] >> 4;
   1.564 +    uint8 r0 = src_argb4444[1] & 0x0f;
   1.565 +    uint8 b2 = next_argb4444[0] & 0x0f;
   1.566 +    uint8 g2 = next_argb4444[0] >> 4;
   1.567 +    uint8 r2 = next_argb4444[1] & 0x0f;
   1.568 +    uint8 b = (b0 + b2);  // 444 * 2 = 555.
   1.569 +    uint8 g = (g0 + g2);
   1.570 +    uint8 r = (r0 + r2);
   1.571 +    b = (b << 3) | (b >> 2);  // 555 -> 888.
   1.572 +    g = (g << 3) | (g >> 2);
   1.573 +    r = (r << 3) | (r >> 2);
   1.574 +    dst_u[0] = RGBToU(r, g, b);
   1.575 +    dst_v[0] = RGBToV(r, g, b);
   1.576 +  }
   1.577 +}
   1.578 +
   1.579 +void ARGBToUV444Row_C(const uint8* src_argb,
   1.580 +                      uint8* dst_u, uint8* dst_v, int width) {
   1.581 +  int x;
   1.582 +  for (x = 0; x < width; ++x) {
   1.583 +    uint8 ab = src_argb[0];
   1.584 +    uint8 ag = src_argb[1];
   1.585 +    uint8 ar = src_argb[2];
   1.586 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.587 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.588 +    src_argb += 4;
   1.589 +    dst_u += 1;
   1.590 +    dst_v += 1;
   1.591 +  }
   1.592 +}
   1.593 +
   1.594 +void ARGBToUV422Row_C(const uint8* src_argb,
   1.595 +                      uint8* dst_u, uint8* dst_v, int width) {
   1.596 +  int x;
   1.597 +  for (x = 0; x < width - 1; x += 2) {
   1.598 +    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
   1.599 +    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
   1.600 +    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
   1.601 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.602 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.603 +    src_argb += 8;
   1.604 +    dst_u += 1;
   1.605 +    dst_v += 1;
   1.606 +  }
   1.607 +  if (width & 1) {
   1.608 +    uint8 ab = src_argb[0];
   1.609 +    uint8 ag = src_argb[1];
   1.610 +    uint8 ar = src_argb[2];
   1.611 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.612 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.613 +  }
   1.614 +}
   1.615 +
   1.616 +void ARGBToUV411Row_C(const uint8* src_argb,
   1.617 +                      uint8* dst_u, uint8* dst_v, int width) {
   1.618 +  int x;
   1.619 +  for (x = 0; x < width - 3; x += 4) {
   1.620 +    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
   1.621 +    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
   1.622 +    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
   1.623 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.624 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.625 +    src_argb += 16;
   1.626 +    dst_u += 1;
   1.627 +    dst_v += 1;
   1.628 +  }
   1.629 +  if ((width & 3) == 3) {
   1.630 +    uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3;
   1.631 +    uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3;
   1.632 +    uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3;
   1.633 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.634 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.635 +  } else if ((width & 3) == 2) {
   1.636 +    uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
   1.637 +    uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
   1.638 +    uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
   1.639 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.640 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.641 +  } else if ((width & 3) == 1) {
   1.642 +    uint8 ab = src_argb[0];
   1.643 +    uint8 ag = src_argb[1];
   1.644 +    uint8 ar = src_argb[2];
   1.645 +    dst_u[0] = RGBToU(ar, ag, ab);
   1.646 +    dst_v[0] = RGBToV(ar, ag, ab);
   1.647 +  }
   1.648 +}
   1.649 +
   1.650 +void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
   1.651 +  int x;
   1.652 +  for (x = 0; x < width; ++x) {
   1.653 +    uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
   1.654 +    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
   1.655 +    dst_argb[3] = src_argb[3];
   1.656 +    dst_argb += 4;
   1.657 +    src_argb += 4;
   1.658 +  }
   1.659 +}
   1.660 +
   1.661 +// Convert a row of image to Sepia tone.
   1.662 +void ARGBSepiaRow_C(uint8* dst_argb, int width) {
   1.663 +  int x;
   1.664 +  for (x = 0; x < width; ++x) {
   1.665 +    int b = dst_argb[0];
   1.666 +    int g = dst_argb[1];
   1.667 +    int r = dst_argb[2];
   1.668 +    int sb = (b * 17 + g * 68 + r * 35) >> 7;
   1.669 +    int sg = (b * 22 + g * 88 + r * 45) >> 7;
   1.670 +    int sr = (b * 24 + g * 98 + r * 50) >> 7;
   1.671 +    // b does not over flow. a is preserved from original.
   1.672 +    dst_argb[0] = sb;
   1.673 +    dst_argb[1] = clamp255(sg);
   1.674 +    dst_argb[2] = clamp255(sr);
   1.675 +    dst_argb += 4;
   1.676 +  }
   1.677 +}
   1.678 +
   1.679 +// Apply color matrix to a row of image. Matrix is signed.
   1.680 +// TODO(fbarchard): Consider adding rounding (+32).
   1.681 +void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
   1.682 +                          const int8* matrix_argb, int width) {
   1.683 +  int x;
   1.684 +  for (x = 0; x < width; ++x) {
   1.685 +    int b = src_argb[0];
   1.686 +    int g = src_argb[1];
   1.687 +    int r = src_argb[2];
   1.688 +    int a = src_argb[3];
   1.689 +    int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
   1.690 +              r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
   1.691 +    int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
   1.692 +              r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
   1.693 +    int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
   1.694 +              r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
   1.695 +    int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
   1.696 +              r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
   1.697 +    dst_argb[0] = Clamp(sb);
   1.698 +    dst_argb[1] = Clamp(sg);
   1.699 +    dst_argb[2] = Clamp(sr);
   1.700 +    dst_argb[3] = Clamp(sa);
   1.701 +    src_argb += 4;
   1.702 +    dst_argb += 4;
   1.703 +  }
   1.704 +}
   1.705 +
   1.706 +// Apply color table to a row of image.
   1.707 +void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
   1.708 +  int x;
   1.709 +  for (x = 0; x < width; ++x) {
   1.710 +    int b = dst_argb[0];
   1.711 +    int g = dst_argb[1];
   1.712 +    int r = dst_argb[2];
   1.713 +    int a = dst_argb[3];
   1.714 +    dst_argb[0] = table_argb[b * 4 + 0];
   1.715 +    dst_argb[1] = table_argb[g * 4 + 1];
   1.716 +    dst_argb[2] = table_argb[r * 4 + 2];
   1.717 +    dst_argb[3] = table_argb[a * 4 + 3];
   1.718 +    dst_argb += 4;
   1.719 +  }
   1.720 +}
   1.721 +
   1.722 +// Apply color table to a row of image.
   1.723 +void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
   1.724 +  int x;
   1.725 +  for (x = 0; x < width; ++x) {
   1.726 +    int b = dst_argb[0];
   1.727 +    int g = dst_argb[1];
   1.728 +    int r = dst_argb[2];
   1.729 +    dst_argb[0] = table_argb[b * 4 + 0];
   1.730 +    dst_argb[1] = table_argb[g * 4 + 1];
   1.731 +    dst_argb[2] = table_argb[r * 4 + 2];
   1.732 +    dst_argb += 4;
   1.733 +  }
   1.734 +}
   1.735 +
   1.736 +void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
   1.737 +                       int interval_offset, int width) {
   1.738 +  int x;
   1.739 +  for (x = 0; x < width; ++x) {
   1.740 +    int b = dst_argb[0];
   1.741 +    int g = dst_argb[1];
   1.742 +    int r = dst_argb[2];
   1.743 +    dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
   1.744 +    dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
   1.745 +    dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
   1.746 +    dst_argb += 4;
   1.747 +  }
   1.748 +}
   1.749 +
   1.750 +#define REPEAT8(v) (v) | ((v) << 8)
   1.751 +#define SHADE(f, v) v * f >> 24
   1.752 +
   1.753 +void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1.754 +                    uint32 value) {
   1.755 +  const uint32 b_scale = REPEAT8(value & 0xff);
   1.756 +  const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
   1.757 +  const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
   1.758 +  const uint32 a_scale = REPEAT8(value >> 24);
   1.759 +
   1.760 +  int i;
   1.761 +  for (i = 0; i < width; ++i) {
   1.762 +    const uint32 b = REPEAT8(src_argb[0]);
   1.763 +    const uint32 g = REPEAT8(src_argb[1]);
   1.764 +    const uint32 r = REPEAT8(src_argb[2]);
   1.765 +    const uint32 a = REPEAT8(src_argb[3]);
   1.766 +    dst_argb[0] = SHADE(b, b_scale);
   1.767 +    dst_argb[1] = SHADE(g, g_scale);
   1.768 +    dst_argb[2] = SHADE(r, r_scale);
   1.769 +    dst_argb[3] = SHADE(a, a_scale);
   1.770 +    src_argb += 4;
   1.771 +    dst_argb += 4;
   1.772 +  }
   1.773 +}
   1.774 +#undef REPEAT8
   1.775 +#undef SHADE
   1.776 +
   1.777 +#define REPEAT8(v) (v) | ((v) << 8)
   1.778 +#define SHADE(f, v) v * f >> 16
   1.779 +
   1.780 +void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
   1.781 +                       uint8* dst_argb, int width) {
   1.782 +  int i;
   1.783 +  for (i = 0; i < width; ++i) {
   1.784 +    const uint32 b = REPEAT8(src_argb0[0]);
   1.785 +    const uint32 g = REPEAT8(src_argb0[1]);
   1.786 +    const uint32 r = REPEAT8(src_argb0[2]);
   1.787 +    const uint32 a = REPEAT8(src_argb0[3]);
   1.788 +    const uint32 b_scale = src_argb1[0];
   1.789 +    const uint32 g_scale = src_argb1[1];
   1.790 +    const uint32 r_scale = src_argb1[2];
   1.791 +    const uint32 a_scale = src_argb1[3];
   1.792 +    dst_argb[0] = SHADE(b, b_scale);
   1.793 +    dst_argb[1] = SHADE(g, g_scale);
   1.794 +    dst_argb[2] = SHADE(r, r_scale);
   1.795 +    dst_argb[3] = SHADE(a, a_scale);
   1.796 +    src_argb0 += 4;
   1.797 +    src_argb1 += 4;
   1.798 +    dst_argb += 4;
   1.799 +  }
   1.800 +}
   1.801 +#undef REPEAT8
   1.802 +#undef SHADE
   1.803 +
   1.804 +#define SHADE(f, v) clamp255(v + f)
   1.805 +
   1.806 +void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
   1.807 +                  uint8* dst_argb, int width) {
   1.808 +  int i;
   1.809 +  for (i = 0; i < width; ++i) {
   1.810 +    const int b = src_argb0[0];
   1.811 +    const int g = src_argb0[1];
   1.812 +    const int r = src_argb0[2];
   1.813 +    const int a = src_argb0[3];
   1.814 +    const int b_add = src_argb1[0];
   1.815 +    const int g_add = src_argb1[1];
   1.816 +    const int r_add = src_argb1[2];
   1.817 +    const int a_add = src_argb1[3];
   1.818 +    dst_argb[0] = SHADE(b, b_add);
   1.819 +    dst_argb[1] = SHADE(g, g_add);
   1.820 +    dst_argb[2] = SHADE(r, r_add);
   1.821 +    dst_argb[3] = SHADE(a, a_add);
   1.822 +    src_argb0 += 4;
   1.823 +    src_argb1 += 4;
   1.824 +    dst_argb += 4;
   1.825 +  }
   1.826 +}
   1.827 +#undef SHADE
   1.828 +
   1.829 +#define SHADE(f, v) clamp0(f - v)
   1.830 +
   1.831 +void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
   1.832 +                       uint8* dst_argb, int width) {
   1.833 +  int i;
   1.834 +  for (i = 0; i < width; ++i) {
   1.835 +    const int b = src_argb0[0];
   1.836 +    const int g = src_argb0[1];
   1.837 +    const int r = src_argb0[2];
   1.838 +    const int a = src_argb0[3];
   1.839 +    const int b_sub = src_argb1[0];
   1.840 +    const int g_sub = src_argb1[1];
   1.841 +    const int r_sub = src_argb1[2];
   1.842 +    const int a_sub = src_argb1[3];
   1.843 +    dst_argb[0] = SHADE(b, b_sub);
   1.844 +    dst_argb[1] = SHADE(g, g_sub);
   1.845 +    dst_argb[2] = SHADE(r, r_sub);
   1.846 +    dst_argb[3] = SHADE(a, a_sub);
   1.847 +    src_argb0 += 4;
   1.848 +    src_argb1 += 4;
   1.849 +    dst_argb += 4;
   1.850 +  }
   1.851 +}
   1.852 +#undef SHADE
   1.853 +
   1.854 +// Sobel functions which mimics SSSE3.
   1.855 +void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
   1.856 +                 uint8* dst_sobelx, int width) {
   1.857 +  int i;
   1.858 +  for (i = 0; i < width; ++i) {
   1.859 +    int a = src_y0[i];
   1.860 +    int b = src_y1[i];
   1.861 +    int c = src_y2[i];
   1.862 +    int a_sub = src_y0[i + 2];
   1.863 +    int b_sub = src_y1[i + 2];
   1.864 +    int c_sub = src_y2[i + 2];
   1.865 +    int a_diff = a - a_sub;
   1.866 +    int b_diff = b - b_sub;
   1.867 +    int c_diff = c - c_sub;
   1.868 +    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
   1.869 +    dst_sobelx[i] = (uint8)(clamp255(sobel));
   1.870 +  }
   1.871 +}
   1.872 +
   1.873 +void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
   1.874 +                 uint8* dst_sobely, int width) {
   1.875 +  int i;
   1.876 +  for (i = 0; i < width; ++i) {
   1.877 +    int a = src_y0[i + 0];
   1.878 +    int b = src_y0[i + 1];
   1.879 +    int c = src_y0[i + 2];
   1.880 +    int a_sub = src_y1[i + 0];
   1.881 +    int b_sub = src_y1[i + 1];
   1.882 +    int c_sub = src_y1[i + 2];
   1.883 +    int a_diff = a - a_sub;
   1.884 +    int b_diff = b - b_sub;
   1.885 +    int c_diff = c - c_sub;
   1.886 +    int sobel = Abs(a_diff + b_diff * 2 + c_diff);
   1.887 +    dst_sobely[i] = (uint8)(clamp255(sobel));
   1.888 +  }
   1.889 +}
   1.890 +
   1.891 +void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1.892 +                uint8* dst_argb, int width) {
   1.893 +  int i;
   1.894 +  for (i = 0; i < width; ++i) {
   1.895 +    int r = src_sobelx[i];
   1.896 +    int b = src_sobely[i];
   1.897 +    int s = clamp255(r + b);
   1.898 +    dst_argb[0] = (uint8)(s);
   1.899 +    dst_argb[1] = (uint8)(s);
   1.900 +    dst_argb[2] = (uint8)(s);
   1.901 +    dst_argb[3] = (uint8)(255u);
   1.902 +    dst_argb += 4;
   1.903 +  }
   1.904 +}
   1.905 +
   1.906 +void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1.907 +                       uint8* dst_y, int width) {
   1.908 +  int i;
   1.909 +  for (i = 0; i < width; ++i) {
   1.910 +    int r = src_sobelx[i];
   1.911 +    int b = src_sobely[i];
   1.912 +    int s = clamp255(r + b);
   1.913 +    dst_y[i] = (uint8)(s);
   1.914 +  }
   1.915 +}
   1.916 +
   1.917 +void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1.918 +                  uint8* dst_argb, int width) {
   1.919 +  int i;
   1.920 +  for (i = 0; i < width; ++i) {
   1.921 +    int r = src_sobelx[i];
   1.922 +    int b = src_sobely[i];
   1.923 +    int g = clamp255(r + b);
   1.924 +    dst_argb[0] = (uint8)(b);
   1.925 +    dst_argb[1] = (uint8)(g);
   1.926 +    dst_argb[2] = (uint8)(r);
   1.927 +    dst_argb[3] = (uint8)(255u);
   1.928 +    dst_argb += 4;
   1.929 +  }
   1.930 +}
   1.931 +
   1.932 +void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
   1.933 +  // Copy a Y to RGB.
   1.934 +  int x;
   1.935 +  for (x = 0; x < width; ++x) {
   1.936 +    uint8 y = src_y[0];
   1.937 +    dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
   1.938 +    dst_argb[3] = 255u;
   1.939 +    dst_argb += 4;
   1.940 +    ++src_y;
   1.941 +  }
   1.942 +}
   1.943 +
   1.944 +// C reference code that mimics the YUV assembly.
   1.945 +
   1.946 +#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
   1.947 +
   1.948 +#define UB 127 /* min(63,(int8)(2.018 * 64)) */
   1.949 +#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
   1.950 +#define UR 0
   1.951 +
   1.952 +#define VB 0
   1.953 +#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
   1.954 +#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
   1.955 +
   1.956 +// Bias
   1.957 +#define BB UB * 128 + VB * 128
   1.958 +#define BG UG * 128 + VG * 128
   1.959 +#define BR UR * 128 + VR * 128
   1.960 +
   1.961 +static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
   1.962 +                              uint8* b, uint8* g, uint8* r) {
   1.963 +  int32 y1 = ((int32)(y) - 16) * YG;
   1.964 +  *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6);
   1.965 +  *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6);
   1.966 +  *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6);
   1.967 +}
   1.968 +
   1.969 +#if !defined(LIBYUV_DISABLE_NEON) && \
   1.970 +    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
   1.971 +// C mimic assembly.
   1.972 +// TODO(fbarchard): Remove subsampling from Neon.
   1.973 +void I444ToARGBRow_C(const uint8* src_y,
   1.974 +                     const uint8* src_u,
   1.975 +                     const uint8* src_v,
   1.976 +                     uint8* rgb_buf,
   1.977 +                     int width) {
   1.978 +  int x;
   1.979 +  for (x = 0; x < width - 1; x += 2) {
   1.980 +    uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
   1.981 +    uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
   1.982 +    YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
   1.983 +    rgb_buf[3] = 255;
   1.984 +    YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
   1.985 +    rgb_buf[7] = 255;
   1.986 +    src_y += 2;
   1.987 +    src_u += 2;
   1.988 +    src_v += 2;
   1.989 +    rgb_buf += 8;  // Advance 2 pixels.
   1.990 +  }
   1.991 +  if (width & 1) {
   1.992 +    YuvPixel(src_y[0], src_u[0], src_v[0],
   1.993 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
   1.994 +  }
   1.995 +}
   1.996 +#else
   1.997 +void I444ToARGBRow_C(const uint8* src_y,
   1.998 +                     const uint8* src_u,
   1.999 +                     const uint8* src_v,
  1.1000 +                     uint8* rgb_buf,
  1.1001 +                     int width) {
  1.1002 +  int x;
  1.1003 +  for (x = 0; x < width; ++x) {
  1.1004 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1005 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1006 +    rgb_buf[3] = 255;
  1.1007 +    src_y += 1;
  1.1008 +    src_u += 1;
  1.1009 +    src_v += 1;
  1.1010 +    rgb_buf += 4;  // Advance 1 pixel.
  1.1011 +  }
  1.1012 +}
  1.1013 +#endif
  1.1014 +// Also used for 420
  1.1015 +void I422ToARGBRow_C(const uint8* src_y,
  1.1016 +                     const uint8* src_u,
  1.1017 +                     const uint8* src_v,
  1.1018 +                     uint8* rgb_buf,
  1.1019 +                     int width) {
  1.1020 +  int x;
  1.1021 +  for (x = 0; x < width - 1; x += 2) {
  1.1022 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1023 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1024 +    rgb_buf[3] = 255;
  1.1025 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1026 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1027 +    rgb_buf[7] = 255;
  1.1028 +    src_y += 2;
  1.1029 +    src_u += 1;
  1.1030 +    src_v += 1;
  1.1031 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1032 +  }
  1.1033 +  if (width & 1) {
  1.1034 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1035 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1036 +    rgb_buf[3] = 255;
  1.1037 +  }
  1.1038 +}
  1.1039 +
  1.1040 +void I422ToRGB24Row_C(const uint8* src_y,
  1.1041 +                      const uint8* src_u,
  1.1042 +                      const uint8* src_v,
  1.1043 +                      uint8* rgb_buf,
  1.1044 +                      int width) {
  1.1045 +  int x;
  1.1046 +  for (x = 0; x < width - 1; x += 2) {
  1.1047 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1048 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1049 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1050 +             rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
  1.1051 +    src_y += 2;
  1.1052 +    src_u += 1;
  1.1053 +    src_v += 1;
  1.1054 +    rgb_buf += 6;  // Advance 2 pixels.
  1.1055 +  }
  1.1056 +  if (width & 1) {
  1.1057 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1058 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1059 +  }
  1.1060 +}
  1.1061 +
  1.1062 +void I422ToRAWRow_C(const uint8* src_y,
  1.1063 +                    const uint8* src_u,
  1.1064 +                    const uint8* src_v,
  1.1065 +                    uint8* rgb_buf,
  1.1066 +                    int width) {
  1.1067 +  int x;
  1.1068 +  for (x = 0; x < width - 1; x += 2) {
  1.1069 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1070 +             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
  1.1071 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1072 +             rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
  1.1073 +    src_y += 2;
  1.1074 +    src_u += 1;
  1.1075 +    src_v += 1;
  1.1076 +    rgb_buf += 6;  // Advance 2 pixels.
  1.1077 +  }
  1.1078 +  if (width & 1) {
  1.1079 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1080 +             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
  1.1081 +  }
  1.1082 +}
  1.1083 +
  1.1084 +void I422ToARGB4444Row_C(const uint8* src_y,
  1.1085 +                         const uint8* src_u,
  1.1086 +                         const uint8* src_v,
  1.1087 +                         uint8* dst_argb4444,
  1.1088 +                         int width) {
  1.1089 +  uint8 b0;
  1.1090 +  uint8 g0;
  1.1091 +  uint8 r0;
  1.1092 +  uint8 b1;
  1.1093 +  uint8 g1;
  1.1094 +  uint8 r1;
  1.1095 +  int x;
  1.1096 +  for (x = 0; x < width - 1; x += 2) {
  1.1097 +    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
  1.1098 +    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
  1.1099 +    b0 = b0 >> 4;
  1.1100 +    g0 = g0 >> 4;
  1.1101 +    r0 = r0 >> 4;
  1.1102 +    b1 = b1 >> 4;
  1.1103 +    g1 = g1 >> 4;
  1.1104 +    r1 = r1 >> 4;
  1.1105 +    *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
  1.1106 +        (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
  1.1107 +    src_y += 2;
  1.1108 +    src_u += 1;
  1.1109 +    src_v += 1;
  1.1110 +    dst_argb4444 += 4;  // Advance 2 pixels.
  1.1111 +  }
  1.1112 +  if (width & 1) {
  1.1113 +    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
  1.1114 +    b0 = b0 >> 4;
  1.1115 +    g0 = g0 >> 4;
  1.1116 +    r0 = r0 >> 4;
  1.1117 +    *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
  1.1118 +        0xf000;
  1.1119 +  }
  1.1120 +}
  1.1121 +
  1.1122 +void I422ToARGB1555Row_C(const uint8* src_y,
  1.1123 +                         const uint8* src_u,
  1.1124 +                         const uint8* src_v,
  1.1125 +                         uint8* dst_argb1555,
  1.1126 +                         int width) {
  1.1127 +  uint8 b0;
  1.1128 +  uint8 g0;
  1.1129 +  uint8 r0;
  1.1130 +  uint8 b1;
  1.1131 +  uint8 g1;
  1.1132 +  uint8 r1;
  1.1133 +  int x;
  1.1134 +  for (x = 0; x < width - 1; x += 2) {
  1.1135 +    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
  1.1136 +    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
  1.1137 +    b0 = b0 >> 3;
  1.1138 +    g0 = g0 >> 3;
  1.1139 +    r0 = r0 >> 3;
  1.1140 +    b1 = b1 >> 3;
  1.1141 +    g1 = g1 >> 3;
  1.1142 +    r1 = r1 >> 3;
  1.1143 +    *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
  1.1144 +        (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
  1.1145 +    src_y += 2;
  1.1146 +    src_u += 1;
  1.1147 +    src_v += 1;
  1.1148 +    dst_argb1555 += 4;  // Advance 2 pixels.
  1.1149 +  }
  1.1150 +  if (width & 1) {
  1.1151 +    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
  1.1152 +    b0 = b0 >> 3;
  1.1153 +    g0 = g0 >> 3;
  1.1154 +    r0 = r0 >> 3;
  1.1155 +    *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
  1.1156 +        0x8000;
  1.1157 +  }
  1.1158 +}
  1.1159 +
  1.1160 +void I422ToRGB565Row_C(const uint8* src_y,
  1.1161 +                       const uint8* src_u,
  1.1162 +                       const uint8* src_v,
  1.1163 +                       uint8* dst_rgb565,
  1.1164 +                       int width) {
  1.1165 +  uint8 b0;
  1.1166 +  uint8 g0;
  1.1167 +  uint8 r0;
  1.1168 +  uint8 b1;
  1.1169 +  uint8 g1;
  1.1170 +  uint8 r1;
  1.1171 +  int x;
  1.1172 +  for (x = 0; x < width - 1; x += 2) {
  1.1173 +    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
  1.1174 +    YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
  1.1175 +    b0 = b0 >> 3;
  1.1176 +    g0 = g0 >> 2;
  1.1177 +    r0 = r0 >> 3;
  1.1178 +    b1 = b1 >> 3;
  1.1179 +    g1 = g1 >> 2;
  1.1180 +    r1 = r1 >> 3;
  1.1181 +    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
  1.1182 +        (b1 << 16) | (g1 << 21) | (r1 << 27);
  1.1183 +    src_y += 2;
  1.1184 +    src_u += 1;
  1.1185 +    src_v += 1;
  1.1186 +    dst_rgb565 += 4;  // Advance 2 pixels.
  1.1187 +  }
  1.1188 +  if (width & 1) {
  1.1189 +    YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
  1.1190 +    b0 = b0 >> 3;
  1.1191 +    g0 = g0 >> 2;
  1.1192 +    r0 = r0 >> 3;
  1.1193 +    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
  1.1194 +  }
  1.1195 +}
  1.1196 +
  1.1197 +void I411ToARGBRow_C(const uint8* src_y,
  1.1198 +                     const uint8* src_u,
  1.1199 +                     const uint8* src_v,
  1.1200 +                     uint8* rgb_buf,
  1.1201 +                     int width) {
  1.1202 +  int x;
  1.1203 +  for (x = 0; x < width - 3; x += 4) {
  1.1204 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1205 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1206 +    rgb_buf[3] = 255;
  1.1207 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1208 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1209 +    rgb_buf[7] = 255;
  1.1210 +    YuvPixel(src_y[2], src_u[0], src_v[0],
  1.1211 +             rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
  1.1212 +    rgb_buf[11] = 255;
  1.1213 +    YuvPixel(src_y[3], src_u[0], src_v[0],
  1.1214 +             rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
  1.1215 +    rgb_buf[15] = 255;
  1.1216 +    src_y += 4;
  1.1217 +    src_u += 1;
  1.1218 +    src_v += 1;
  1.1219 +    rgb_buf += 16;  // Advance 4 pixels.
  1.1220 +  }
  1.1221 +  if (width & 2) {
  1.1222 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1223 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1224 +    rgb_buf[3] = 255;
  1.1225 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1226 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1227 +    rgb_buf[7] = 255;
  1.1228 +    src_y += 2;
  1.1229 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1230 +  }
  1.1231 +  if (width & 1) {
  1.1232 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1233 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1234 +    rgb_buf[3] = 255;
  1.1235 +  }
  1.1236 +}
  1.1237 +
  1.1238 +void NV12ToARGBRow_C(const uint8* src_y,
  1.1239 +                     const uint8* usrc_v,
  1.1240 +                     uint8* rgb_buf,
  1.1241 +                     int width) {
  1.1242 +  int x;
  1.1243 +  for (x = 0; x < width - 1; x += 2) {
  1.1244 +    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
  1.1245 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1246 +    rgb_buf[3] = 255;
  1.1247 +    YuvPixel(src_y[1], usrc_v[0], usrc_v[1],
  1.1248 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1249 +    rgb_buf[7] = 255;
  1.1250 +    src_y += 2;
  1.1251 +    usrc_v += 2;
  1.1252 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1253 +  }
  1.1254 +  if (width & 1) {
  1.1255 +    YuvPixel(src_y[0], usrc_v[0], usrc_v[1],
  1.1256 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1257 +    rgb_buf[3] = 255;
  1.1258 +  }
  1.1259 +}
  1.1260 +
  1.1261 +void NV21ToARGBRow_C(const uint8* src_y,
  1.1262 +                     const uint8* src_vu,
  1.1263 +                     uint8* rgb_buf,
  1.1264 +                     int width) {
  1.1265 +  int x;
  1.1266 +  for (x = 0; x < width - 1; x += 2) {
  1.1267 +    YuvPixel(src_y[0], src_vu[1], src_vu[0],
  1.1268 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1269 +    rgb_buf[3] = 255;
  1.1270 +
  1.1271 +    YuvPixel(src_y[1], src_vu[1], src_vu[0],
  1.1272 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1273 +    rgb_buf[7] = 255;
  1.1274 +
  1.1275 +    src_y += 2;
  1.1276 +    src_vu += 2;
  1.1277 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1278 +  }
  1.1279 +  if (width & 1) {
  1.1280 +    YuvPixel(src_y[0], src_vu[1], src_vu[0],
  1.1281 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1282 +    rgb_buf[3] = 255;
  1.1283 +  }
  1.1284 +}
  1.1285 +
  1.1286 +void NV12ToRGB565Row_C(const uint8* src_y,
  1.1287 +                       const uint8* usrc_v,
  1.1288 +                       uint8* dst_rgb565,
  1.1289 +                       int width) {
  1.1290 +  uint8 b0;
  1.1291 +  uint8 g0;
  1.1292 +  uint8 r0;
  1.1293 +  uint8 b1;
  1.1294 +  uint8 g1;
  1.1295 +  uint8 r1;
  1.1296 +  int x;
  1.1297 +  for (x = 0; x < width - 1; x += 2) {
  1.1298 +    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
  1.1299 +    YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1);
  1.1300 +    b0 = b0 >> 3;
  1.1301 +    g0 = g0 >> 2;
  1.1302 +    r0 = r0 >> 3;
  1.1303 +    b1 = b1 >> 3;
  1.1304 +    g1 = g1 >> 2;
  1.1305 +    r1 = r1 >> 3;
  1.1306 +    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
  1.1307 +        (b1 << 16) | (g1 << 21) | (r1 << 27);
  1.1308 +    src_y += 2;
  1.1309 +    usrc_v += 2;
  1.1310 +    dst_rgb565 += 4;  // Advance 2 pixels.
  1.1311 +  }
  1.1312 +  if (width & 1) {
  1.1313 +    YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0);
  1.1314 +    b0 = b0 >> 3;
  1.1315 +    g0 = g0 >> 2;
  1.1316 +    r0 = r0 >> 3;
  1.1317 +    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
  1.1318 +  }
  1.1319 +}
  1.1320 +
  1.1321 +void NV21ToRGB565Row_C(const uint8* src_y,
  1.1322 +                       const uint8* vsrc_u,
  1.1323 +                       uint8* dst_rgb565,
  1.1324 +                       int width) {
  1.1325 +  uint8 b0;
  1.1326 +  uint8 g0;
  1.1327 +  uint8 r0;
  1.1328 +  uint8 b1;
  1.1329 +  uint8 g1;
  1.1330 +  uint8 r1;
  1.1331 +  int x;
  1.1332 +  for (x = 0; x < width - 1; x += 2) {
  1.1333 +    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
  1.1334 +    YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
  1.1335 +    b0 = b0 >> 3;
  1.1336 +    g0 = g0 >> 2;
  1.1337 +    r0 = r0 >> 3;
  1.1338 +    b1 = b1 >> 3;
  1.1339 +    g1 = g1 >> 2;
  1.1340 +    r1 = r1 >> 3;
  1.1341 +    *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
  1.1342 +        (b1 << 16) | (g1 << 21) | (r1 << 27);
  1.1343 +    src_y += 2;
  1.1344 +    vsrc_u += 2;
  1.1345 +    dst_rgb565 += 4;  // Advance 2 pixels.
  1.1346 +  }
  1.1347 +  if (width & 1) {
  1.1348 +    YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
  1.1349 +    b0 = b0 >> 3;
  1.1350 +    g0 = g0 >> 2;
  1.1351 +    r0 = r0 >> 3;
  1.1352 +    *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
  1.1353 +  }
  1.1354 +}
  1.1355 +
  1.1356 +void YUY2ToARGBRow_C(const uint8* src_yuy2,
  1.1357 +                     uint8* rgb_buf,
  1.1358 +                     int width) {
  1.1359 +  int x;
  1.1360 +  for (x = 0; x < width - 1; x += 2) {
  1.1361 +    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
  1.1362 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1363 +    rgb_buf[3] = 255;
  1.1364 +    YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
  1.1365 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1366 +    rgb_buf[7] = 255;
  1.1367 +    src_yuy2 += 4;
  1.1368 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1369 +  }
  1.1370 +  if (width & 1) {
  1.1371 +    YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
  1.1372 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1373 +    rgb_buf[3] = 255;
  1.1374 +  }
  1.1375 +}
  1.1376 +
  1.1377 +void UYVYToARGBRow_C(const uint8* src_uyvy,
  1.1378 +                     uint8* rgb_buf,
  1.1379 +                     int width) {
  1.1380 +  int x;
  1.1381 +  for (x = 0; x < width - 1; x += 2) {
  1.1382 +    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
  1.1383 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1384 +    rgb_buf[3] = 255;
  1.1385 +    YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
  1.1386 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1387 +    rgb_buf[7] = 255;
  1.1388 +    src_uyvy += 4;
  1.1389 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1390 +  }
  1.1391 +  if (width & 1) {
  1.1392 +    YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
  1.1393 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1394 +    rgb_buf[3] = 255;
  1.1395 +  }
  1.1396 +}
  1.1397 +
  1.1398 +void I422ToBGRARow_C(const uint8* src_y,
  1.1399 +                     const uint8* src_u,
  1.1400 +                     const uint8* src_v,
  1.1401 +                     uint8* rgb_buf,
  1.1402 +                     int width) {
  1.1403 +  int x;
  1.1404 +  for (x = 0; x < width - 1; x += 2) {
  1.1405 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1406 +             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
  1.1407 +    rgb_buf[0] = 255;
  1.1408 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1409 +             rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
  1.1410 +    rgb_buf[4] = 255;
  1.1411 +    src_y += 2;
  1.1412 +    src_u += 1;
  1.1413 +    src_v += 1;
  1.1414 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1415 +  }
  1.1416 +  if (width & 1) {
  1.1417 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1418 +             rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
  1.1419 +    rgb_buf[0] = 255;
  1.1420 +  }
  1.1421 +}
  1.1422 +
  1.1423 +void I422ToABGRRow_C(const uint8* src_y,
  1.1424 +                     const uint8* src_u,
  1.1425 +                     const uint8* src_v,
  1.1426 +                     uint8* rgb_buf,
  1.1427 +                     int width) {
  1.1428 +  int x;
  1.1429 +  for (x = 0; x < width - 1; x += 2) {
  1.1430 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1431 +             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
  1.1432 +    rgb_buf[3] = 255;
  1.1433 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1434 +             rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
  1.1435 +    rgb_buf[7] = 255;
  1.1436 +    src_y += 2;
  1.1437 +    src_u += 1;
  1.1438 +    src_v += 1;
  1.1439 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1440 +  }
  1.1441 +  if (width & 1) {
  1.1442 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1443 +             rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
  1.1444 +    rgb_buf[3] = 255;
  1.1445 +  }
  1.1446 +}
  1.1447 +
  1.1448 +void I422ToRGBARow_C(const uint8* src_y,
  1.1449 +                     const uint8* src_u,
  1.1450 +                     const uint8* src_v,
  1.1451 +                     uint8* rgb_buf,
  1.1452 +                     int width) {
  1.1453 +  int x;
  1.1454 +  for (x = 0; x < width - 1; x += 2) {
  1.1455 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1456 +             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
  1.1457 +    rgb_buf[0] = 255;
  1.1458 +    YuvPixel(src_y[1], src_u[0], src_v[0],
  1.1459 +             rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
  1.1460 +    rgb_buf[4] = 255;
  1.1461 +    src_y += 2;
  1.1462 +    src_u += 1;
  1.1463 +    src_v += 1;
  1.1464 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1465 +  }
  1.1466 +  if (width & 1) {
  1.1467 +    YuvPixel(src_y[0], src_u[0], src_v[0],
  1.1468 +             rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
  1.1469 +    rgb_buf[0] = 255;
  1.1470 +  }
  1.1471 +}
  1.1472 +
  1.1473 +void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
  1.1474 +  int x;
  1.1475 +  for (x = 0; x < width - 1; x += 2) {
  1.1476 +    YuvPixel(src_y[0], 128, 128,
  1.1477 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1478 +    rgb_buf[3] = 255;
  1.1479 +    YuvPixel(src_y[1], 128, 128,
  1.1480 +             rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
  1.1481 +    rgb_buf[7] = 255;
  1.1482 +    src_y += 2;
  1.1483 +    rgb_buf += 8;  // Advance 2 pixels.
  1.1484 +  }
  1.1485 +  if (width & 1) {
  1.1486 +    YuvPixel(src_y[0], 128, 128,
  1.1487 +             rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
  1.1488 +    rgb_buf[3] = 255;
  1.1489 +  }
  1.1490 +}
  1.1491 +
  1.1492 +void MirrorRow_C(const uint8* src, uint8* dst, int width) {
  1.1493 +  int x;
  1.1494 +  src += width - 1;
  1.1495 +  for (x = 0; x < width - 1; x += 2) {
  1.1496 +    dst[x] = src[0];
  1.1497 +    dst[x + 1] = src[-1];
  1.1498 +    src -= 2;
  1.1499 +  }
  1.1500 +  if (width & 1) {
  1.1501 +    dst[width - 1] = src[0];
  1.1502 +  }
  1.1503 +}
  1.1504 +
  1.1505 +void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
  1.1506 +  int x;
  1.1507 +  src_uv += (width - 1) << 1;
  1.1508 +  for (x = 0; x < width - 1; x += 2) {
  1.1509 +    dst_u[x] = src_uv[0];
  1.1510 +    dst_u[x + 1] = src_uv[-2];
  1.1511 +    dst_v[x] = src_uv[1];
  1.1512 +    dst_v[x + 1] = src_uv[-2 + 1];
  1.1513 +    src_uv -= 4;
  1.1514 +  }
  1.1515 +  if (width & 1) {
  1.1516 +    dst_u[width - 1] = src_uv[0];
  1.1517 +    dst_v[width - 1] = src_uv[1];
  1.1518 +  }
  1.1519 +}
  1.1520 +
  1.1521 +void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
  1.1522 +  int x;
  1.1523 +  const uint32* src32 = (const uint32*)(src);
  1.1524 +  uint32* dst32 = (uint32*)(dst);
  1.1525 +  src32 += width - 1;
  1.1526 +  for (x = 0; x < width - 1; x += 2) {
  1.1527 +    dst32[x] = src32[0];
  1.1528 +    dst32[x + 1] = src32[-1];
  1.1529 +    src32 -= 2;
  1.1530 +  }
  1.1531 +  if (width & 1) {
  1.1532 +    dst32[width - 1] = src32[0];
  1.1533 +  }
  1.1534 +}
  1.1535 +
  1.1536 +void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
  1.1537 +  int x;
  1.1538 +  for (x = 0; x < width - 1; x += 2) {
  1.1539 +    dst_u[x] = src_uv[0];
  1.1540 +    dst_u[x + 1] = src_uv[2];
  1.1541 +    dst_v[x] = src_uv[1];
  1.1542 +    dst_v[x + 1] = src_uv[3];
  1.1543 +    src_uv += 4;
  1.1544 +  }
  1.1545 +  if (width & 1) {
  1.1546 +    dst_u[width - 1] = src_uv[0];
  1.1547 +    dst_v[width - 1] = src_uv[1];
  1.1548 +  }
  1.1549 +}
  1.1550 +
  1.1551 +void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
  1.1552 +                  int width) {
  1.1553 +  int x;
  1.1554 +  for (x = 0; x < width - 1; x += 2) {
  1.1555 +    dst_uv[0] = src_u[x];
  1.1556 +    dst_uv[1] = src_v[x];
  1.1557 +    dst_uv[2] = src_u[x + 1];
  1.1558 +    dst_uv[3] = src_v[x + 1];
  1.1559 +    dst_uv += 4;
  1.1560 +  }
  1.1561 +  if (width & 1) {
  1.1562 +    dst_uv[0] = src_u[width - 1];
  1.1563 +    dst_uv[1] = src_v[width - 1];
  1.1564 +  }
  1.1565 +}
  1.1566 +
  1.1567 +void CopyRow_C(const uint8* src, uint8* dst, int count) {
  1.1568 +  memcpy(dst, src, count);
  1.1569 +}
  1.1570 +
  1.1571 +void SetRow_C(uint8* dst, uint32 v8, int count) {
  1.1572 +#ifdef _MSC_VER
  1.1573 +  // VC will generate rep stosb.
  1.1574 +  int x;
  1.1575 +  for (x = 0; x < count; ++x) {
  1.1576 +    dst[x] = v8;
  1.1577 +  }
  1.1578 +#else
  1.1579 +  memset(dst, v8, count);
  1.1580 +#endif
  1.1581 +}
  1.1582 +
  1.1583 +void ARGBSetRows_C(uint8* dst, uint32 v32, int width,
  1.1584 +                 int dst_stride, int height) {
  1.1585 +  int y;
  1.1586 +  for (y = 0; y < height; ++y) {
  1.1587 +    uint32* d = (uint32*)(dst);
  1.1588 +    int x;
  1.1589 +    for (x = 0; x < width; ++x) {
  1.1590 +      d[x] = v32;
  1.1591 +    }
  1.1592 +    dst += dst_stride;
  1.1593 +  }
  1.1594 +}
  1.1595 +
  1.1596 +// Filter 2 rows of YUY2 UV's (422) into U and V (420).
  1.1597 +void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
  1.1598 +                   uint8* dst_u, uint8* dst_v, int width) {
  1.1599 +  // Output a row of UV values, filtering 2 rows of YUY2.
  1.1600 +  int x;
  1.1601 +  for (x = 0; x < width; x += 2) {
  1.1602 +    dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
  1.1603 +    dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
  1.1604 +    src_yuy2 += 4;
  1.1605 +    dst_u += 1;
  1.1606 +    dst_v += 1;
  1.1607 +  }
  1.1608 +}
  1.1609 +
  1.1610 +// Copy row of YUY2 UV's (422) into U and V (422).
  1.1611 +void YUY2ToUV422Row_C(const uint8* src_yuy2,
  1.1612 +                      uint8* dst_u, uint8* dst_v, int width) {
  1.1613 +  // Output a row of UV values.
  1.1614 +  int x;
  1.1615 +  for (x = 0; x < width; x += 2) {
  1.1616 +    dst_u[0] = src_yuy2[1];
  1.1617 +    dst_v[0] = src_yuy2[3];
  1.1618 +    src_yuy2 += 4;
  1.1619 +    dst_u += 1;
  1.1620 +    dst_v += 1;
  1.1621 +  }
  1.1622 +}
  1.1623 +
  1.1624 +// Copy row of YUY2 Y's (422) into Y (420/422).
  1.1625 +void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
  1.1626 +  // Output a row of Y values.
  1.1627 +  int x;
  1.1628 +  for (x = 0; x < width - 1; x += 2) {
  1.1629 +    dst_y[x] = src_yuy2[0];
  1.1630 +    dst_y[x + 1] = src_yuy2[2];
  1.1631 +    src_yuy2 += 4;
  1.1632 +  }
  1.1633 +  if (width & 1) {
  1.1634 +    dst_y[width - 1] = src_yuy2[0];
  1.1635 +  }
  1.1636 +}
  1.1637 +
  1.1638 +// Filter 2 rows of UYVY UV's (422) into U and V (420).
  1.1639 +void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
  1.1640 +                   uint8* dst_u, uint8* dst_v, int width) {
  1.1641 +  // Output a row of UV values.
  1.1642 +  int x;
  1.1643 +  for (x = 0; x < width; x += 2) {
  1.1644 +    dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
  1.1645 +    dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
  1.1646 +    src_uyvy += 4;
  1.1647 +    dst_u += 1;
  1.1648 +    dst_v += 1;
  1.1649 +  }
  1.1650 +}
  1.1651 +
  1.1652 +// Copy row of UYVY UV's (422) into U and V (422).
  1.1653 +void UYVYToUV422Row_C(const uint8* src_uyvy,
  1.1654 +                      uint8* dst_u, uint8* dst_v, int width) {
  1.1655 +  // Output a row of UV values.
  1.1656 +  int x;
  1.1657 +  for (x = 0; x < width; x += 2) {
  1.1658 +    dst_u[0] = src_uyvy[0];
  1.1659 +    dst_v[0] = src_uyvy[2];
  1.1660 +    src_uyvy += 4;
  1.1661 +    dst_u += 1;
  1.1662 +    dst_v += 1;
  1.1663 +  }
  1.1664 +}
  1.1665 +
  1.1666 +// Copy row of UYVY Y's (422) into Y (420/422).
  1.1667 +void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
  1.1668 +  // Output a row of Y values.
  1.1669 +  int x;
  1.1670 +  for (x = 0; x < width - 1; x += 2) {
  1.1671 +    dst_y[x] = src_uyvy[1];
  1.1672 +    dst_y[x + 1] = src_uyvy[3];
  1.1673 +    src_uyvy += 4;
  1.1674 +  }
  1.1675 +  if (width & 1) {
  1.1676 +    dst_y[width - 1] = src_uyvy[1];
  1.1677 +  }
  1.1678 +}
  1.1679 +
  1.1680 +#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
  1.1681 +
  1.1682 +// Blend src_argb0 over src_argb1 and store to dst_argb.
  1.1683 +// dst_argb may be src_argb0 or src_argb1.
  1.1684 +// This code mimics the SSSE3 version for better testability.
  1.1685 +void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
  1.1686 +                    uint8* dst_argb, int width) {
  1.1687 +  int x;
  1.1688 +  for (x = 0; x < width - 1; x += 2) {
  1.1689 +    uint32 fb = src_argb0[0];
  1.1690 +    uint32 fg = src_argb0[1];
  1.1691 +    uint32 fr = src_argb0[2];
  1.1692 +    uint32 a = src_argb0[3];
  1.1693 +    uint32 bb = src_argb1[0];
  1.1694 +    uint32 bg = src_argb1[1];
  1.1695 +    uint32 br = src_argb1[2];
  1.1696 +    dst_argb[0] = BLEND(fb, bb, a);
  1.1697 +    dst_argb[1] = BLEND(fg, bg, a);
  1.1698 +    dst_argb[2] = BLEND(fr, br, a);
  1.1699 +    dst_argb[3] = 255u;
  1.1700 +
  1.1701 +    fb = src_argb0[4 + 0];
  1.1702 +    fg = src_argb0[4 + 1];
  1.1703 +    fr = src_argb0[4 + 2];
  1.1704 +    a = src_argb0[4 + 3];
  1.1705 +    bb = src_argb1[4 + 0];
  1.1706 +    bg = src_argb1[4 + 1];
  1.1707 +    br = src_argb1[4 + 2];
  1.1708 +    dst_argb[4 + 0] = BLEND(fb, bb, a);
  1.1709 +    dst_argb[4 + 1] = BLEND(fg, bg, a);
  1.1710 +    dst_argb[4 + 2] = BLEND(fr, br, a);
  1.1711 +    dst_argb[4 + 3] = 255u;
  1.1712 +    src_argb0 += 8;
  1.1713 +    src_argb1 += 8;
  1.1714 +    dst_argb += 8;
  1.1715 +  }
  1.1716 +
  1.1717 +  if (width & 1) {
  1.1718 +    uint32 fb = src_argb0[0];
  1.1719 +    uint32 fg = src_argb0[1];
  1.1720 +    uint32 fr = src_argb0[2];
  1.1721 +    uint32 a = src_argb0[3];
  1.1722 +    uint32 bb = src_argb1[0];
  1.1723 +    uint32 bg = src_argb1[1];
  1.1724 +    uint32 br = src_argb1[2];
  1.1725 +    dst_argb[0] = BLEND(fb, bb, a);
  1.1726 +    dst_argb[1] = BLEND(fg, bg, a);
  1.1727 +    dst_argb[2] = BLEND(fr, br, a);
  1.1728 +    dst_argb[3] = 255u;
  1.1729 +  }
  1.1730 +}
  1.1731 +#undef BLEND
  1.1732 +#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
  1.1733 +
  1.1734 +// Multiply source RGB by alpha and store to destination.
  1.1735 +// This code mimics the SSSE3 version for better testability.
  1.1736 +void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
  1.1737 +  int i;
  1.1738 +  for (i = 0; i < width - 1; i += 2) {
  1.1739 +    uint32 b = src_argb[0];
  1.1740 +    uint32 g = src_argb[1];
  1.1741 +    uint32 r = src_argb[2];
  1.1742 +    uint32 a = src_argb[3];
  1.1743 +    dst_argb[0] = ATTENUATE(b, a);
  1.1744 +    dst_argb[1] = ATTENUATE(g, a);
  1.1745 +    dst_argb[2] = ATTENUATE(r, a);
  1.1746 +    dst_argb[3] = a;
  1.1747 +    b = src_argb[4];
  1.1748 +    g = src_argb[5];
  1.1749 +    r = src_argb[6];
  1.1750 +    a = src_argb[7];
  1.1751 +    dst_argb[4] = ATTENUATE(b, a);
  1.1752 +    dst_argb[5] = ATTENUATE(g, a);
  1.1753 +    dst_argb[6] = ATTENUATE(r, a);
  1.1754 +    dst_argb[7] = a;
  1.1755 +    src_argb += 8;
  1.1756 +    dst_argb += 8;
  1.1757 +  }
  1.1758 +
  1.1759 +  if (width & 1) {
  1.1760 +    const uint32 b = src_argb[0];
  1.1761 +    const uint32 g = src_argb[1];
  1.1762 +    const uint32 r = src_argb[2];
  1.1763 +    const uint32 a = src_argb[3];
  1.1764 +    dst_argb[0] = ATTENUATE(b, a);
  1.1765 +    dst_argb[1] = ATTENUATE(g, a);
  1.1766 +    dst_argb[2] = ATTENUATE(r, a);
  1.1767 +    dst_argb[3] = a;
  1.1768 +  }
  1.1769 +}
  1.1770 +#undef ATTENUATE
  1.1771 +
  1.1772 +// Divide source RGB by alpha and store to destination.
  1.1773 +// b = (b * 255 + (a / 2)) / a;
  1.1774 +// g = (g * 255 + (a / 2)) / a;
  1.1775 +// r = (r * 255 + (a / 2)) / a;
  1.1776 +// Reciprocal method is off by 1 on some values. ie 125
  1.1777 +// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
  1.1778 +#define T(a) 0x01000000 + (0x10000 / a)
  1.1779 +const uint32 fixed_invtbl8[256] = {
  1.1780 +  0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
  1.1781 +  T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
  1.1782 +  T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
  1.1783 +  T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
  1.1784 +  T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
  1.1785 +  T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
  1.1786 +  T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
  1.1787 +  T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
  1.1788 +  T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
  1.1789 +  T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
  1.1790 +  T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
  1.1791 +  T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
  1.1792 +  T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
  1.1793 +  T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
  1.1794 +  T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
  1.1795 +  T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
  1.1796 +  T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
  1.1797 +  T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
  1.1798 +  T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
  1.1799 +  T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
  1.1800 +  T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
  1.1801 +  T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
  1.1802 +  T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
  1.1803 +  T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
  1.1804 +  T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
  1.1805 +  T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
  1.1806 +  T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
  1.1807 +  T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
  1.1808 +  T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
  1.1809 +  T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
  1.1810 +  T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
  1.1811 +  T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
  1.1812 +#undef T
  1.1813 +
  1.1814 +void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
  1.1815 +  int i;
  1.1816 +  for (i = 0; i < width; ++i) {
  1.1817 +    uint32 b = src_argb[0];
  1.1818 +    uint32 g = src_argb[1];
  1.1819 +    uint32 r = src_argb[2];
  1.1820 +    const uint32 a = src_argb[3];
  1.1821 +    const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
  1.1822 +    b = (b * ia) >> 8;
  1.1823 +    g = (g * ia) >> 8;
  1.1824 +    r = (r * ia) >> 8;
  1.1825 +    // Clamping should not be necessary but is free in assembly.
  1.1826 +    dst_argb[0] = clamp255(b);
  1.1827 +    dst_argb[1] = clamp255(g);
  1.1828 +    dst_argb[2] = clamp255(r);
  1.1829 +    dst_argb[3] = a;
  1.1830 +    src_argb += 4;
  1.1831 +    dst_argb += 4;
  1.1832 +  }
  1.1833 +}
  1.1834 +
  1.1835 +void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
  1.1836 +                               const int32* previous_cumsum, int width) {
  1.1837 +  int32 row_sum[4] = {0, 0, 0, 0};
  1.1838 +  int x;
  1.1839 +  for (x = 0; x < width; ++x) {
  1.1840 +    row_sum[0] += row[x * 4 + 0];
  1.1841 +    row_sum[1] += row[x * 4 + 1];
  1.1842 +    row_sum[2] += row[x * 4 + 2];
  1.1843 +    row_sum[3] += row[x * 4 + 3];
  1.1844 +    cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
  1.1845 +    cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
  1.1846 +    cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
  1.1847 +    cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
  1.1848 +  }
  1.1849 +}
  1.1850 +
  1.1851 +void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
  1.1852 +                                int w, int area, uint8* dst, int count) {
  1.1853 +  float ooa = 1.0f / area;
  1.1854 +  int i;
  1.1855 +  for (i = 0; i < count; ++i) {
  1.1856 +    dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
  1.1857 +    dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
  1.1858 +    dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
  1.1859 +    dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
  1.1860 +    dst += 4;
  1.1861 +    tl += 4;
  1.1862 +    bl += 4;
  1.1863 +  }
  1.1864 +}
  1.1865 +
  1.1866 +// Copy pixels from rotated source to destination row with a slope.
  1.1867 +LIBYUV_API
  1.1868 +void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
  1.1869 +                     uint8* dst_argb, const float* uv_dudv, int width) {
  1.1870 +  int i;
  1.1871 +  // Render a row of pixels from source into a buffer.
  1.1872 +  float uv[2];
  1.1873 +  uv[0] = uv_dudv[0];
  1.1874 +  uv[1] = uv_dudv[1];
  1.1875 +  for (i = 0; i < width; ++i) {
  1.1876 +    int x = (int)(uv[0]);
  1.1877 +    int y = (int)(uv[1]);
  1.1878 +    *(uint32*)(dst_argb) =
  1.1879 +        *(const uint32*)(src_argb + y * src_argb_stride +
  1.1880 +                                         x * 4);
  1.1881 +    dst_argb += 4;
  1.1882 +    uv[0] += uv_dudv[2];
  1.1883 +    uv[1] += uv_dudv[3];
  1.1884 +  }
  1.1885 +}
  1.1886 +
  1.1887 +// Blend 2 rows into 1 for conversions such as I422ToI420.
  1.1888 +void HalfRow_C(const uint8* src_uv, int src_uv_stride,
  1.1889 +               uint8* dst_uv, int pix) {
  1.1890 +  int x;
  1.1891 +  for (x = 0; x < pix; ++x) {
  1.1892 +    dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
  1.1893 +  }
  1.1894 +}
  1.1895 +
  1.1896 +// C version 2x2 -> 2x1.
  1.1897 +void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
  1.1898 +                      ptrdiff_t src_stride,
  1.1899 +                      int width, int source_y_fraction) {
  1.1900 +  int y1_fraction = source_y_fraction;
  1.1901 +  int y0_fraction = 256 - y1_fraction;
  1.1902 +  const uint8* src_ptr1 = src_ptr + src_stride;
  1.1903 +  int x;
  1.1904 +  if (source_y_fraction == 0) {
  1.1905 +    memcpy(dst_ptr, src_ptr, width);
  1.1906 +    return;
  1.1907 +  }
  1.1908 +  if (source_y_fraction == 128) {
  1.1909 +    HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width);
  1.1910 +    return;
  1.1911 +  }
  1.1912 +  for (x = 0; x < width - 1; x += 2) {
  1.1913 +    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
  1.1914 +    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
  1.1915 +    src_ptr += 2;
  1.1916 +    src_ptr1 += 2;
  1.1917 +    dst_ptr += 2;
  1.1918 +  }
  1.1919 +  if (width & 1) {
  1.1920 +    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
  1.1921 +  }
  1.1922 +}
  1.1923 +
  1.1924 +// Select 2 channels from ARGB on alternating pixels.  e.g.  BGBGBGBG
  1.1925 +void ARGBToBayerRow_C(const uint8* src_argb,
  1.1926 +                      uint8* dst_bayer, uint32 selector, int pix) {
  1.1927 +  int index0 = selector & 0xff;
  1.1928 +  int index1 = (selector >> 8) & 0xff;
  1.1929 +  // Copy a row of Bayer.
  1.1930 +  int x;
  1.1931 +  for (x = 0; x < pix - 1; x += 2) {
  1.1932 +    dst_bayer[0] = src_argb[index0];
  1.1933 +    dst_bayer[1] = src_argb[index1];
  1.1934 +    src_argb += 8;
  1.1935 +    dst_bayer += 2;
  1.1936 +  }
  1.1937 +  if (pix & 1) {
  1.1938 +    dst_bayer[0] = src_argb[index0];
  1.1939 +  }
  1.1940 +}
  1.1941 +
  1.1942 +// Select G channel from ARGB.  e.g.  GGGGGGGG
  1.1943 +void ARGBToBayerGGRow_C(const uint8* src_argb,
  1.1944 +                        uint8* dst_bayer, uint32 selector, int pix) {
  1.1945 +  // Copy a row of G.
  1.1946 +  int x;
  1.1947 +  for (x = 0; x < pix - 1; x += 2) {
  1.1948 +    dst_bayer[0] = src_argb[1];
  1.1949 +    dst_bayer[1] = src_argb[5];
  1.1950 +    src_argb += 8;
  1.1951 +    dst_bayer += 2;
  1.1952 +  }
  1.1953 +  if (pix & 1) {
  1.1954 +    dst_bayer[0] = src_argb[1];
  1.1955 +  }
  1.1956 +}
  1.1957 +
  1.1958 +// Use first 4 shuffler values to reorder ARGB channels.
  1.1959 +void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
  1.1960 +                      const uint8* shuffler, int pix) {
  1.1961 +  int index0 = shuffler[0];
  1.1962 +  int index1 = shuffler[1];
  1.1963 +  int index2 = shuffler[2];
  1.1964 +  int index3 = shuffler[3];
  1.1965 +  // Shuffle a row of ARGB.
  1.1966 +  int x;
  1.1967 +  for (x = 0; x < pix; ++x) {
  1.1968 +    // To support in-place conversion.
  1.1969 +    uint8 b = src_argb[index0];
  1.1970 +    uint8 g = src_argb[index1];
  1.1971 +    uint8 r = src_argb[index2];
  1.1972 +    uint8 a = src_argb[index3];
  1.1973 +    dst_argb[0] = b;
  1.1974 +    dst_argb[1] = g;
  1.1975 +    dst_argb[2] = r;
  1.1976 +    dst_argb[3] = a;
  1.1977 +    src_argb += 4;
  1.1978 +    dst_argb += 4;
  1.1979 +  }
  1.1980 +}
  1.1981 +
  1.1982 +void I422ToYUY2Row_C(const uint8* src_y,
  1.1983 +                     const uint8* src_u,
  1.1984 +                     const uint8* src_v,
  1.1985 +                     uint8* dst_frame, int width) {
  1.1986 +  int x;
  1.1987 +  for (x = 0; x < width - 1; x += 2) {
  1.1988 +    dst_frame[0] = src_y[0];
  1.1989 +    dst_frame[1] = src_u[0];
  1.1990 +    dst_frame[2] = src_y[1];
  1.1991 +    dst_frame[3] = src_v[0];
  1.1992 +    dst_frame += 4;
  1.1993 +    src_y += 2;
  1.1994 +    src_u += 1;
  1.1995 +    src_v += 1;
  1.1996 +  }
  1.1997 +  if (width & 1) {
  1.1998 +    dst_frame[0] = src_y[0];
  1.1999 +    dst_frame[1] = src_u[0];
  1.2000 +    dst_frame[2] = src_y[0];  // duplicate last y
  1.2001 +    dst_frame[3] = src_v[0];
  1.2002 +  }
  1.2003 +}
  1.2004 +
  1.2005 +void I422ToUYVYRow_C(const uint8* src_y,
  1.2006 +                     const uint8* src_u,
  1.2007 +                     const uint8* src_v,
  1.2008 +                     uint8* dst_frame, int width) {
  1.2009 +  int x;
  1.2010 +  for (x = 0; x < width - 1; x += 2) {
  1.2011 +    dst_frame[0] = src_u[0];
  1.2012 +    dst_frame[1] = src_y[0];
  1.2013 +    dst_frame[2] = src_v[0];
  1.2014 +    dst_frame[3] = src_y[1];
  1.2015 +    dst_frame += 4;
  1.2016 +    src_y += 2;
  1.2017 +    src_u += 1;
  1.2018 +    src_v += 1;
  1.2019 +  }
  1.2020 +  if (width & 1) {
  1.2021 +    dst_frame[0] = src_u[0];
  1.2022 +    dst_frame[1] = src_y[0];
  1.2023 +    dst_frame[2] = src_v[0];
  1.2024 +    dst_frame[3] = src_y[0];  // duplicate last y
  1.2025 +  }
  1.2026 +}
  1.2027 +
  1.2028 +#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3)
  1.2029 +// row_win.cc has asm version, but GCC uses 2 step wrapper.
  1.2030 +#if defined(__x86_64__) || defined(__i386__)
  1.2031 +void I422ToRGB565Row_SSSE3(const uint8* src_y,
  1.2032 +                           const uint8* src_u,
  1.2033 +                           const uint8* src_v,
  1.2034 +                           uint8* rgb_buf,
  1.2035 +                           int width) {
  1.2036 +  // Allocate a row of ARGB.
  1.2037 +  align_buffer_64(row, width * 4);
  1.2038 +  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
  1.2039 +  ARGBToRGB565Row_SSE2(row, rgb_buf, width);
  1.2040 +  free_aligned_buffer_64(row);
  1.2041 +}
  1.2042 +#endif  // defined(__x86_64__) || defined(__i386__)
  1.2043 +
  1.2044 +#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
  1.2045 +void I422ToARGB1555Row_SSSE3(const uint8* src_y,
  1.2046 +                             const uint8* src_u,
  1.2047 +                             const uint8* src_v,
  1.2048 +                             uint8* rgb_buf,
  1.2049 +                             int width) {
  1.2050 +  // Allocate a row of ARGB.
  1.2051 +  align_buffer_64(row, width * 4);
  1.2052 +  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
  1.2053 +  ARGBToARGB1555Row_SSE2(row, rgb_buf, width);
  1.2054 +  free_aligned_buffer_64(row);
  1.2055 +}
  1.2056 +
  1.2057 +void I422ToARGB4444Row_SSSE3(const uint8* src_y,
  1.2058 +                             const uint8* src_u,
  1.2059 +                             const uint8* src_v,
  1.2060 +                             uint8* rgb_buf,
  1.2061 +                             int width) {
  1.2062 +  // Allocate a row of ARGB.
  1.2063 +  align_buffer_64(row, width * 4);
  1.2064 +  I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width);
  1.2065 +  ARGBToARGB4444Row_SSE2(row, rgb_buf, width);
  1.2066 +  free_aligned_buffer_64(row);
  1.2067 +}
  1.2068 +
  1.2069 +void NV12ToRGB565Row_SSSE3(const uint8* src_y,
  1.2070 +                           const uint8* src_uv,
  1.2071 +                           uint8* dst_rgb565,
  1.2072 +                           int width) {
  1.2073 +  // Allocate a row of ARGB.
  1.2074 +  align_buffer_64(row, width * 4);
  1.2075 +  NV12ToARGBRow_SSSE3(src_y, src_uv, row, width);
  1.2076 +  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
  1.2077 +  free_aligned_buffer_64(row);
  1.2078 +}
  1.2079 +
  1.2080 +void NV21ToRGB565Row_SSSE3(const uint8* src_y,
  1.2081 +                           const uint8* src_vu,
  1.2082 +                           uint8* dst_rgb565,
  1.2083 +                           int width) {
  1.2084 +  // Allocate a row of ARGB.
  1.2085 +  align_buffer_64(row, width * 4);
  1.2086 +  NV21ToARGBRow_SSSE3(src_y, src_vu, row, width);
  1.2087 +  ARGBToRGB565Row_SSE2(row, dst_rgb565, width);
  1.2088 +  free_aligned_buffer_64(row);
  1.2089 +}
  1.2090 +
  1.2091 +void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
  1.2092 +                         uint8* dst_argb,
  1.2093 +                         int width) {
  1.2094 +  // Allocate a rows of yuv.
  1.2095 +  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  1.2096 +  uint8* row_u = row_y + ((width + 63) & ~63);
  1.2097 +  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
  1.2098 +  YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width);
  1.2099 +  YUY2ToYRow_SSE2(src_yuy2, row_y, width);
  1.2100 +  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
  1.2101 +  free_aligned_buffer_64(row_y);
  1.2102 +}
  1.2103 +
  1.2104 +void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
  1.2105 +                                   uint8* dst_argb,
  1.2106 +                                   int width) {
  1.2107 +  // Allocate a rows of yuv.
  1.2108 +  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  1.2109 +  uint8* row_u = row_y + ((width + 63) & ~63);
  1.2110 +  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
  1.2111 +  YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
  1.2112 +  YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
  1.2113 +  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
  1.2114 +  free_aligned_buffer_64(row_y);
  1.2115 +}
  1.2116 +
  1.2117 +void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
  1.2118 +                         uint8* dst_argb,
  1.2119 +                         int width) {
  1.2120 +  // Allocate a rows of yuv.
  1.2121 +  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  1.2122 +  uint8* row_u = row_y + ((width + 63) & ~63);
  1.2123 +  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
  1.2124 +  UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width);
  1.2125 +  UYVYToYRow_SSE2(src_uyvy, row_y, width);
  1.2126 +  I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width);
  1.2127 +  free_aligned_buffer_64(row_y);
  1.2128 +}
  1.2129 +
  1.2130 +void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
  1.2131 +                                   uint8* dst_argb,
  1.2132 +                                   int width) {
  1.2133 +  // Allocate a rows of yuv.
  1.2134 +  align_buffer_64(row_y, ((width + 63) & ~63) * 2);
  1.2135 +  uint8* row_u = row_y + ((width + 63) & ~63);
  1.2136 +  uint8* row_v = row_u + ((width + 63) & ~63) / 2;
  1.2137 +  UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
  1.2138 +  UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
  1.2139 +  I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
  1.2140 +  free_aligned_buffer_64(row_y);
  1.2141 +}
  1.2142 +
  1.2143 +#endif  // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
  1.2144 +#endif  // !defined(LIBYUV_DISABLE_X86)
  1.2145 +
  1.2146 +void ARGBPolynomialRow_C(const uint8* src_argb,
  1.2147 +                         uint8* dst_argb, const float* poly,
  1.2148 +                         int width) {
  1.2149 +  int i;
  1.2150 +  for (i = 0; i < width; ++i) {
  1.2151 +    float b = (float)(src_argb[0]);
  1.2152 +    float g = (float)(src_argb[1]);
  1.2153 +    float r = (float)(src_argb[2]);
  1.2154 +    float a = (float)(src_argb[3]);
  1.2155 +    float b2 = b * b;
  1.2156 +    float g2 = g * g;
  1.2157 +    float r2 = r * r;
  1.2158 +    float a2 = a * a;
  1.2159 +    float db = poly[0] + poly[4] * b;
  1.2160 +    float dg = poly[1] + poly[5] * g;
  1.2161 +    float dr = poly[2] + poly[6] * r;
  1.2162 +    float da = poly[3] + poly[7] * a;
  1.2163 +    float b3 = b2 * b;
  1.2164 +    float g3 = g2 * g;
  1.2165 +    float r3 = r2 * r;
  1.2166 +    float a3 = a2 * a;
  1.2167 +    db += poly[8] * b2;
  1.2168 +    dg += poly[9] * g2;
  1.2169 +    dr += poly[10] * r2;
  1.2170 +    da += poly[11] * a2;
  1.2171 +    db += poly[12] * b3;
  1.2172 +    dg += poly[13] * g3;
  1.2173 +    dr += poly[14] * r3;
  1.2174 +    da += poly[15] * a3;
  1.2175 +
  1.2176 +    dst_argb[0] = Clamp((int32)(db));
  1.2177 +    dst_argb[1] = Clamp((int32)(dg));
  1.2178 +    dst_argb[2] = Clamp((int32)(dr));
  1.2179 +    dst_argb[3] = Clamp((int32)(da));
  1.2180 +    src_argb += 4;
  1.2181 +    dst_argb += 4;
  1.2182 +  }
  1.2183 +}
  1.2184 +
  1.2185 +void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
  1.2186 +                             const uint8* luma, uint32 lumacoeff) {
  1.2187 +  uint32 bc = lumacoeff & 0xff;
  1.2188 +  uint32 gc = (lumacoeff >> 8) & 0xff;
  1.2189 +  uint32 rc = (lumacoeff >> 16) & 0xff;
  1.2190 +
  1.2191 +  int i;
  1.2192 +  for (i = 0; i < width - 1; i += 2) {
  1.2193 +    // Luminance in rows, color values in columns.
  1.2194 +    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
  1.2195 +                           src_argb[2] * rc) & 0x7F00u) + luma;
  1.2196 +    const uint8* luma1;
  1.2197 +    dst_argb[0] = luma0[src_argb[0]];
  1.2198 +    dst_argb[1] = luma0[src_argb[1]];
  1.2199 +    dst_argb[2] = luma0[src_argb[2]];
  1.2200 +    dst_argb[3] = src_argb[3];
  1.2201 +    luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
  1.2202 +              src_argb[6] * rc) & 0x7F00u) + luma;
  1.2203 +    dst_argb[4] = luma1[src_argb[4]];
  1.2204 +    dst_argb[5] = luma1[src_argb[5]];
  1.2205 +    dst_argb[6] = luma1[src_argb[6]];
  1.2206 +    dst_argb[7] = src_argb[7];
  1.2207 +    src_argb += 8;
  1.2208 +    dst_argb += 8;
  1.2209 +  }
  1.2210 +  if (width & 1) {
  1.2211 +    // Luminance in rows, color values in columns.
  1.2212 +    const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
  1.2213 +                           src_argb[2] * rc) & 0x7F00u) + luma;
  1.2214 +    dst_argb[0] = luma0[src_argb[0]];
  1.2215 +    dst_argb[1] = luma0[src_argb[1]];
  1.2216 +    dst_argb[2] = luma0[src_argb[2]];
  1.2217 +    dst_argb[3] = src_argb[3];
  1.2218 +  }
  1.2219 +}
  1.2220 +
  1.2221 +void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
  1.2222 +  int i;
  1.2223 +  for (i = 0; i < width - 1; i += 2) {
  1.2224 +    dst[3] = src[3];
  1.2225 +    dst[7] = src[7];
  1.2226 +    dst += 8;
  1.2227 +    src += 8;
  1.2228 +  }
  1.2229 +  if (width & 1) {
  1.2230 +    dst[3] = src[3];
  1.2231 +  }
  1.2232 +}
  1.2233 +
  1.2234 +void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
  1.2235 +  int i;
  1.2236 +  for (i = 0; i < width - 1; i += 2) {
  1.2237 +    dst[3] = src[0];
  1.2238 +    dst[7] = src[1];
  1.2239 +    dst += 8;
  1.2240 +    src += 2;
  1.2241 +  }
  1.2242 +  if (width & 1) {
  1.2243 +    dst[3] = src[0];
  1.2244 +  }
  1.2245 +}
  1.2246 +
  1.2247 +#ifdef __cplusplus
  1.2248 +}  // extern "C"
  1.2249 +}  // namespace libyuv
  1.2250 +#endif

mercurial