1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/row_common.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,2247 @@ 1.4 +/* 1.5 + * Copyright 2011 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/row.h" 1.15 + 1.16 +#include <string.h> // For memcpy and memset. 1.17 + 1.18 +#include "libyuv/basic_types.h" 1.19 + 1.20 +#ifdef __cplusplus 1.21 +namespace libyuv { 1.22 +extern "C" { 1.23 +#endif 1.24 + 1.25 +// llvm x86 is poor at ternary operator, so use branchless min/max. 1.26 + 1.27 +#define USE_BRANCHLESS 1 1.28 +#if USE_BRANCHLESS 1.29 +static __inline int32 clamp0(int32 v) { 1.30 + return ((-(v) >> 31) & (v)); 1.31 +} 1.32 + 1.33 +static __inline int32 clamp255(int32 v) { 1.34 + return (((255 - (v)) >> 31) | (v)) & 255; 1.35 +} 1.36 + 1.37 +static __inline uint32 Clamp(int32 val) { 1.38 + int v = clamp0(val); 1.39 + return (uint32)(clamp255(v)); 1.40 +} 1.41 + 1.42 +static __inline uint32 Abs(int32 v) { 1.43 + int m = v >> 31; 1.44 + return (v + m) ^ m; 1.45 +} 1.46 +#else // USE_BRANCHLESS 1.47 +static __inline int32 clamp0(int32 v) { 1.48 + return (v < 0) ? 0 : v; 1.49 +} 1.50 + 1.51 +static __inline int32 clamp255(int32 v) { 1.52 + return (v > 255) ? 255 : v; 1.53 +} 1.54 + 1.55 +static __inline uint32 Clamp(int32 val) { 1.56 + int v = clamp0(val); 1.57 + return (uint32)(clamp255(v)); 1.58 +} 1.59 + 1.60 +static __inline uint32 Abs(int32 v) { 1.61 + return (v < 0) ? -v : v; 1.62 +} 1.63 +#endif // USE_BRANCHLESS 1.64 + 1.65 +#ifdef LIBYUV_LITTLE_ENDIAN 1.66 +#define WRITEWORD(p, v) *(uint32*)(p) = v 1.67 +#else 1.68 +static inline void WRITEWORD(uint8* p, uint32 v) { 1.69 + p[0] = (uint8)(v & 255); 1.70 + p[1] = (uint8)((v >> 8) & 255); 1.71 + p[2] = (uint8)((v >> 16) & 255); 1.72 + p[3] = (uint8)((v >> 24) & 255); 1.73 +} 1.74 +#endif 1.75 + 1.76 +void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { 1.77 + int x; 1.78 + for (x = 0; x < width; ++x) { 1.79 + uint8 b = src_rgb24[0]; 1.80 + uint8 g = src_rgb24[1]; 1.81 + uint8 r = src_rgb24[2]; 1.82 + dst_argb[0] = b; 1.83 + dst_argb[1] = g; 1.84 + dst_argb[2] = r; 1.85 + dst_argb[3] = 255u; 1.86 + dst_argb += 4; 1.87 + src_rgb24 += 3; 1.88 + } 1.89 +} 1.90 + 1.91 +void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { 1.92 + int x; 1.93 + for (x = 0; x < width; ++x) { 1.94 + uint8 r = src_raw[0]; 1.95 + uint8 g = src_raw[1]; 1.96 + uint8 b = src_raw[2]; 1.97 + dst_argb[0] = b; 1.98 + dst_argb[1] = g; 1.99 + dst_argb[2] = r; 1.100 + dst_argb[3] = 255u; 1.101 + dst_argb += 4; 1.102 + src_raw += 3; 1.103 + } 1.104 +} 1.105 + 1.106 +void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { 1.107 + int x; 1.108 + for (x = 0; x < width; ++x) { 1.109 + uint8 b = src_rgb565[0] & 0x1f; 1.110 + uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 1.111 + uint8 r = src_rgb565[1] >> 3; 1.112 + dst_argb[0] = (b << 3) | (b >> 2); 1.113 + dst_argb[1] = (g << 2) | (g >> 4); 1.114 + dst_argb[2] = (r << 3) | (r >> 2); 1.115 + dst_argb[3] = 255u; 1.116 + dst_argb += 4; 1.117 + src_rgb565 += 2; 1.118 + } 1.119 +} 1.120 + 1.121 +void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, 1.122 + int width) { 1.123 + int x; 1.124 + for (x = 0; x < width; ++x) { 1.125 + uint8 b = src_argb1555[0] & 0x1f; 1.126 + uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 1.127 + uint8 r = (src_argb1555[1] & 0x7c) >> 2; 1.128 + uint8 a = src_argb1555[1] >> 7; 1.129 + dst_argb[0] = (b << 3) | (b >> 2); 1.130 + dst_argb[1] = (g << 3) | (g >> 2); 1.131 + dst_argb[2] = (r << 3) | (r >> 2); 1.132 + dst_argb[3] = -a; 1.133 + dst_argb += 4; 1.134 + src_argb1555 += 2; 1.135 + } 1.136 +} 1.137 + 1.138 +void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, 1.139 + int width) { 1.140 + int x; 1.141 + for (x = 0; x < width; ++x) { 1.142 + uint8 b = src_argb4444[0] & 0x0f; 1.143 + uint8 g = src_argb4444[0] >> 4; 1.144 + uint8 r = src_argb4444[1] & 0x0f; 1.145 + uint8 a = src_argb4444[1] >> 4; 1.146 + dst_argb[0] = (b << 4) | b; 1.147 + dst_argb[1] = (g << 4) | g; 1.148 + dst_argb[2] = (r << 4) | r; 1.149 + dst_argb[3] = (a << 4) | a; 1.150 + dst_argb += 4; 1.151 + src_argb4444 += 2; 1.152 + } 1.153 +} 1.154 + 1.155 +void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 1.156 + int x; 1.157 + for (x = 0; x < width; ++x) { 1.158 + uint8 b = src_argb[0]; 1.159 + uint8 g = src_argb[1]; 1.160 + uint8 r = src_argb[2]; 1.161 + dst_rgb[0] = b; 1.162 + dst_rgb[1] = g; 1.163 + dst_rgb[2] = r; 1.164 + dst_rgb += 3; 1.165 + src_argb += 4; 1.166 + } 1.167 +} 1.168 + 1.169 +void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { 1.170 + int x; 1.171 + for (x = 0; x < width; ++x) { 1.172 + uint8 b = src_argb[0]; 1.173 + uint8 g = src_argb[1]; 1.174 + uint8 r = src_argb[2]; 1.175 + dst_rgb[0] = r; 1.176 + dst_rgb[1] = g; 1.177 + dst_rgb[2] = b; 1.178 + dst_rgb += 3; 1.179 + src_argb += 4; 1.180 + } 1.181 +} 1.182 + 1.183 +void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 1.184 + int x; 1.185 + for (x = 0; x < width - 1; x += 2) { 1.186 + uint8 b0 = src_argb[0] >> 3; 1.187 + uint8 g0 = src_argb[1] >> 2; 1.188 + uint8 r0 = src_argb[2] >> 3; 1.189 + uint8 b1 = src_argb[4] >> 3; 1.190 + uint8 g1 = src_argb[5] >> 2; 1.191 + uint8 r1 = src_argb[6] >> 3; 1.192 + WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | 1.193 + (b1 << 16) | (g1 << 21) | (r1 << 27)); 1.194 + dst_rgb += 4; 1.195 + src_argb += 8; 1.196 + } 1.197 + if (width & 1) { 1.198 + uint8 b0 = src_argb[0] >> 3; 1.199 + uint8 g0 = src_argb[1] >> 2; 1.200 + uint8 r0 = src_argb[2] >> 3; 1.201 + *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); 1.202 + } 1.203 +} 1.204 + 1.205 +void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 1.206 + int x; 1.207 + for (x = 0; x < width - 1; x += 2) { 1.208 + uint8 b0 = src_argb[0] >> 3; 1.209 + uint8 g0 = src_argb[1] >> 3; 1.210 + uint8 r0 = src_argb[2] >> 3; 1.211 + uint8 a0 = src_argb[3] >> 7; 1.212 + uint8 b1 = src_argb[4] >> 3; 1.213 + uint8 g1 = src_argb[5] >> 3; 1.214 + uint8 r1 = src_argb[6] >> 3; 1.215 + uint8 a1 = src_argb[7] >> 7; 1.216 + *(uint32*)(dst_rgb) = 1.217 + b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | 1.218 + (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); 1.219 + dst_rgb += 4; 1.220 + src_argb += 8; 1.221 + } 1.222 + if (width & 1) { 1.223 + uint8 b0 = src_argb[0] >> 3; 1.224 + uint8 g0 = src_argb[1] >> 3; 1.225 + uint8 r0 = src_argb[2] >> 3; 1.226 + uint8 a0 = src_argb[3] >> 7; 1.227 + *(uint16*)(dst_rgb) = 1.228 + b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); 1.229 + } 1.230 +} 1.231 + 1.232 +void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { 1.233 + int x; 1.234 + for (x = 0; x < width - 1; x += 2) { 1.235 + uint8 b0 = src_argb[0] >> 4; 1.236 + uint8 g0 = src_argb[1] >> 4; 1.237 + uint8 r0 = src_argb[2] >> 4; 1.238 + uint8 a0 = src_argb[3] >> 4; 1.239 + uint8 b1 = src_argb[4] >> 4; 1.240 + uint8 g1 = src_argb[5] >> 4; 1.241 + uint8 r1 = src_argb[6] >> 4; 1.242 + uint8 a1 = src_argb[7] >> 4; 1.243 + *(uint32*)(dst_rgb) = 1.244 + b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | 1.245 + (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); 1.246 + dst_rgb += 4; 1.247 + src_argb += 8; 1.248 + } 1.249 + if (width & 1) { 1.250 + uint8 b0 = src_argb[0] >> 4; 1.251 + uint8 g0 = src_argb[1] >> 4; 1.252 + uint8 r0 = src_argb[2] >> 4; 1.253 + uint8 a0 = src_argb[3] >> 4; 1.254 + *(uint16*)(dst_rgb) = 1.255 + b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); 1.256 + } 1.257 +} 1.258 + 1.259 +static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { 1.260 + return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; 1.261 +} 1.262 + 1.263 +static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { 1.264 + return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; 1.265 +} 1.266 +static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { 1.267 + return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; 1.268 +} 1.269 + 1.270 +#define MAKEROWY(NAME, R, G, B, BPP) \ 1.271 +void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 1.272 + int x; \ 1.273 + for (x = 0; x < width; ++x) { \ 1.274 + dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ 1.275 + src_argb0 += BPP; \ 1.276 + dst_y += 1; \ 1.277 + } \ 1.278 +} \ 1.279 +void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 1.280 + uint8* dst_u, uint8* dst_v, int width) { \ 1.281 + const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 1.282 + int x; \ 1.283 + for (x = 0; x < width - 1; x += 2) { \ 1.284 + uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \ 1.285 + src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \ 1.286 + uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \ 1.287 + src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \ 1.288 + uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \ 1.289 + src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \ 1.290 + dst_u[0] = RGBToU(ar, ag, ab); \ 1.291 + dst_v[0] = RGBToV(ar, ag, ab); \ 1.292 + src_rgb0 += BPP * 2; \ 1.293 + src_rgb1 += BPP * 2; \ 1.294 + dst_u += 1; \ 1.295 + dst_v += 1; \ 1.296 + } \ 1.297 + if (width & 1) { \ 1.298 + uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ 1.299 + uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ 1.300 + uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ 1.301 + dst_u[0] = RGBToU(ar, ag, ab); \ 1.302 + dst_v[0] = RGBToV(ar, ag, ab); \ 1.303 + } \ 1.304 +} 1.305 + 1.306 +MAKEROWY(ARGB, 2, 1, 0, 4) 1.307 +MAKEROWY(BGRA, 1, 2, 3, 4) 1.308 +MAKEROWY(ABGR, 0, 1, 2, 4) 1.309 +MAKEROWY(RGBA, 3, 2, 1, 4) 1.310 +MAKEROWY(RGB24, 2, 1, 0, 3) 1.311 +MAKEROWY(RAW, 0, 1, 2, 3) 1.312 +#undef MAKEROWY 1.313 + 1.314 +// JPeg uses a variation on BT.601-1 full range 1.315 +// y = 0.29900 * r + 0.58700 * g + 0.11400 * b 1.316 +// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center 1.317 +// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center 1.318 +// BT.601 Mpeg range uses: 1.319 +// b 0.1016 * 255 = 25.908 = 25 1.320 +// g 0.5078 * 255 = 129.489 = 129 1.321 +// r 0.2578 * 255 = 65.739 = 66 1.322 +// JPeg 8 bit Y (not used): 1.323 +// b 0.11400 * 256 = 29.184 = 29 1.324 +// g 0.58700 * 256 = 150.272 = 150 1.325 +// r 0.29900 * 256 = 76.544 = 77 1.326 +// JPeg 7 bit Y: 1.327 +// b 0.11400 * 128 = 14.592 = 15 1.328 +// g 0.58700 * 128 = 75.136 = 75 1.329 +// r 0.29900 * 128 = 38.272 = 38 1.330 +// JPeg 8 bit U: 1.331 +// b 0.50000 * 255 = 127.5 = 127 1.332 +// g -0.33126 * 255 = -84.4713 = -84 1.333 +// r -0.16874 * 255 = -43.0287 = -43 1.334 +// JPeg 8 bit V: 1.335 +// b -0.08131 * 255 = -20.73405 = -20 1.336 +// g -0.41869 * 255 = -106.76595 = -107 1.337 +// r 0.50000 * 255 = 127.5 = 127 1.338 + 1.339 +static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { 1.340 + return (38 * r + 75 * g + 15 * b + 64) >> 7; 1.341 +} 1.342 + 1.343 +static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { 1.344 + return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; 1.345 +} 1.346 +static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { 1.347 + return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; 1.348 +} 1.349 + 1.350 +#define AVGB(a, b) (((a) + (b) + 1) >> 1) 1.351 + 1.352 +#define MAKEROWYJ(NAME, R, G, B, BPP) \ 1.353 +void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ 1.354 + int x; \ 1.355 + for (x = 0; x < width; ++x) { \ 1.356 + dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ 1.357 + src_argb0 += BPP; \ 1.358 + dst_y += 1; \ 1.359 + } \ 1.360 +} \ 1.361 +void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ 1.362 + uint8* dst_u, uint8* dst_v, int width) { \ 1.363 + const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ 1.364 + int x; \ 1.365 + for (x = 0; x < width - 1; x += 2) { \ 1.366 + uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ 1.367 + AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ 1.368 + uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ 1.369 + AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ 1.370 + uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ 1.371 + AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ 1.372 + dst_u[0] = RGBToUJ(ar, ag, ab); \ 1.373 + dst_v[0] = RGBToVJ(ar, ag, ab); \ 1.374 + src_rgb0 += BPP * 2; \ 1.375 + src_rgb1 += BPP * 2; \ 1.376 + dst_u += 1; \ 1.377 + dst_v += 1; \ 1.378 + } \ 1.379 + if (width & 1) { \ 1.380 + uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ 1.381 + uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ 1.382 + uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ 1.383 + dst_u[0] = RGBToUJ(ar, ag, ab); \ 1.384 + dst_v[0] = RGBToVJ(ar, ag, ab); \ 1.385 + } \ 1.386 +} 1.387 + 1.388 +MAKEROWYJ(ARGB, 2, 1, 0, 4) 1.389 +#undef MAKEROWYJ 1.390 + 1.391 +void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { 1.392 + int x; 1.393 + for (x = 0; x < width; ++x) { 1.394 + uint8 b = src_rgb565[0] & 0x1f; 1.395 + uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 1.396 + uint8 r = src_rgb565[1] >> 3; 1.397 + b = (b << 3) | (b >> 2); 1.398 + g = (g << 2) | (g >> 4); 1.399 + r = (r << 3) | (r >> 2); 1.400 + dst_y[0] = RGBToY(r, g, b); 1.401 + src_rgb565 += 2; 1.402 + dst_y += 1; 1.403 + } 1.404 +} 1.405 + 1.406 +void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { 1.407 + int x; 1.408 + for (x = 0; x < width; ++x) { 1.409 + uint8 b = src_argb1555[0] & 0x1f; 1.410 + uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 1.411 + uint8 r = (src_argb1555[1] & 0x7c) >> 2; 1.412 + b = (b << 3) | (b >> 2); 1.413 + g = (g << 3) | (g >> 2); 1.414 + r = (r << 3) | (r >> 2); 1.415 + dst_y[0] = RGBToY(r, g, b); 1.416 + src_argb1555 += 2; 1.417 + dst_y += 1; 1.418 + } 1.419 +} 1.420 + 1.421 +void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { 1.422 + int x; 1.423 + for (x = 0; x < width; ++x) { 1.424 + uint8 b = src_argb4444[0] & 0x0f; 1.425 + uint8 g = src_argb4444[0] >> 4; 1.426 + uint8 r = src_argb4444[1] & 0x0f; 1.427 + b = (b << 4) | b; 1.428 + g = (g << 4) | g; 1.429 + r = (r << 4) | r; 1.430 + dst_y[0] = RGBToY(r, g, b); 1.431 + src_argb4444 += 2; 1.432 + dst_y += 1; 1.433 + } 1.434 +} 1.435 + 1.436 +void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, 1.437 + uint8* dst_u, uint8* dst_v, int width) { 1.438 + const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; 1.439 + int x; 1.440 + for (x = 0; x < width - 1; x += 2) { 1.441 + uint8 b0 = src_rgb565[0] & 0x1f; 1.442 + uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 1.443 + uint8 r0 = src_rgb565[1] >> 3; 1.444 + uint8 b1 = src_rgb565[2] & 0x1f; 1.445 + uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); 1.446 + uint8 r1 = src_rgb565[3] >> 3; 1.447 + uint8 b2 = next_rgb565[0] & 0x1f; 1.448 + uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 1.449 + uint8 r2 = next_rgb565[1] >> 3; 1.450 + uint8 b3 = next_rgb565[2] & 0x1f; 1.451 + uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); 1.452 + uint8 r3 = next_rgb565[3] >> 3; 1.453 + uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. 1.454 + uint8 g = (g0 + g1 + g2 + g3); 1.455 + uint8 r = (r0 + r1 + r2 + r3); 1.456 + b = (b << 1) | (b >> 6); // 787 -> 888. 1.457 + r = (r << 1) | (r >> 6); 1.458 + dst_u[0] = RGBToU(r, g, b); 1.459 + dst_v[0] = RGBToV(r, g, b); 1.460 + src_rgb565 += 4; 1.461 + next_rgb565 += 4; 1.462 + dst_u += 1; 1.463 + dst_v += 1; 1.464 + } 1.465 + if (width & 1) { 1.466 + uint8 b0 = src_rgb565[0] & 0x1f; 1.467 + uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); 1.468 + uint8 r0 = src_rgb565[1] >> 3; 1.469 + uint8 b2 = next_rgb565[0] & 0x1f; 1.470 + uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); 1.471 + uint8 r2 = next_rgb565[1] >> 3; 1.472 + uint8 b = (b0 + b2); // 565 * 2 = 676. 1.473 + uint8 g = (g0 + g2); 1.474 + uint8 r = (r0 + r2); 1.475 + b = (b << 2) | (b >> 4); // 676 -> 888 1.476 + g = (g << 1) | (g >> 6); 1.477 + r = (r << 2) | (r >> 4); 1.478 + dst_u[0] = RGBToU(r, g, b); 1.479 + dst_v[0] = RGBToV(r, g, b); 1.480 + } 1.481 +} 1.482 + 1.483 +void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, 1.484 + uint8* dst_u, uint8* dst_v, int width) { 1.485 + const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; 1.486 + int x; 1.487 + for (x = 0; x < width - 1; x += 2) { 1.488 + uint8 b0 = src_argb1555[0] & 0x1f; 1.489 + uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 1.490 + uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 1.491 + uint8 b1 = src_argb1555[2] & 0x1f; 1.492 + uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); 1.493 + uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; 1.494 + uint8 b2 = next_argb1555[0] & 0x1f; 1.495 + uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 1.496 + uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; 1.497 + uint8 b3 = next_argb1555[2] & 0x1f; 1.498 + uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); 1.499 + uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; 1.500 + uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. 1.501 + uint8 g = (g0 + g1 + g2 + g3); 1.502 + uint8 r = (r0 + r1 + r2 + r3); 1.503 + b = (b << 1) | (b >> 6); // 777 -> 888. 1.504 + g = (g << 1) | (g >> 6); 1.505 + r = (r << 1) | (r >> 6); 1.506 + dst_u[0] = RGBToU(r, g, b); 1.507 + dst_v[0] = RGBToV(r, g, b); 1.508 + src_argb1555 += 4; 1.509 + next_argb1555 += 4; 1.510 + dst_u += 1; 1.511 + dst_v += 1; 1.512 + } 1.513 + if (width & 1) { 1.514 + uint8 b0 = src_argb1555[0] & 0x1f; 1.515 + uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); 1.516 + uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; 1.517 + uint8 b2 = next_argb1555[0] & 0x1f; 1.518 + uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); 1.519 + uint8 r2 = next_argb1555[1] >> 3; 1.520 + uint8 b = (b0 + b2); // 555 * 2 = 666. 1.521 + uint8 g = (g0 + g2); 1.522 + uint8 r = (r0 + r2); 1.523 + b = (b << 2) | (b >> 4); // 666 -> 888. 1.524 + g = (g << 2) | (g >> 4); 1.525 + r = (r << 2) | (r >> 4); 1.526 + dst_u[0] = RGBToU(r, g, b); 1.527 + dst_v[0] = RGBToV(r, g, b); 1.528 + } 1.529 +} 1.530 + 1.531 +void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, 1.532 + uint8* dst_u, uint8* dst_v, int width) { 1.533 + const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; 1.534 + int x; 1.535 + for (x = 0; x < width - 1; x += 2) { 1.536 + uint8 b0 = src_argb4444[0] & 0x0f; 1.537 + uint8 g0 = src_argb4444[0] >> 4; 1.538 + uint8 r0 = src_argb4444[1] & 0x0f; 1.539 + uint8 b1 = src_argb4444[2] & 0x0f; 1.540 + uint8 g1 = src_argb4444[2] >> 4; 1.541 + uint8 r1 = src_argb4444[3] & 0x0f; 1.542 + uint8 b2 = next_argb4444[0] & 0x0f; 1.543 + uint8 g2 = next_argb4444[0] >> 4; 1.544 + uint8 r2 = next_argb4444[1] & 0x0f; 1.545 + uint8 b3 = next_argb4444[2] & 0x0f; 1.546 + uint8 g3 = next_argb4444[2] >> 4; 1.547 + uint8 r3 = next_argb4444[3] & 0x0f; 1.548 + uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. 1.549 + uint8 g = (g0 + g1 + g2 + g3); 1.550 + uint8 r = (r0 + r1 + r2 + r3); 1.551 + b = (b << 2) | (b >> 4); // 666 -> 888. 1.552 + g = (g << 2) | (g >> 4); 1.553 + r = (r << 2) | (r >> 4); 1.554 + dst_u[0] = RGBToU(r, g, b); 1.555 + dst_v[0] = RGBToV(r, g, b); 1.556 + src_argb4444 += 4; 1.557 + next_argb4444 += 4; 1.558 + dst_u += 1; 1.559 + dst_v += 1; 1.560 + } 1.561 + if (width & 1) { 1.562 + uint8 b0 = src_argb4444[0] & 0x0f; 1.563 + uint8 g0 = src_argb4444[0] >> 4; 1.564 + uint8 r0 = src_argb4444[1] & 0x0f; 1.565 + uint8 b2 = next_argb4444[0] & 0x0f; 1.566 + uint8 g2 = next_argb4444[0] >> 4; 1.567 + uint8 r2 = next_argb4444[1] & 0x0f; 1.568 + uint8 b = (b0 + b2); // 444 * 2 = 555. 1.569 + uint8 g = (g0 + g2); 1.570 + uint8 r = (r0 + r2); 1.571 + b = (b << 3) | (b >> 2); // 555 -> 888. 1.572 + g = (g << 3) | (g >> 2); 1.573 + r = (r << 3) | (r >> 2); 1.574 + dst_u[0] = RGBToU(r, g, b); 1.575 + dst_v[0] = RGBToV(r, g, b); 1.576 + } 1.577 +} 1.578 + 1.579 +void ARGBToUV444Row_C(const uint8* src_argb, 1.580 + uint8* dst_u, uint8* dst_v, int width) { 1.581 + int x; 1.582 + for (x = 0; x < width; ++x) { 1.583 + uint8 ab = src_argb[0]; 1.584 + uint8 ag = src_argb[1]; 1.585 + uint8 ar = src_argb[2]; 1.586 + dst_u[0] = RGBToU(ar, ag, ab); 1.587 + dst_v[0] = RGBToV(ar, ag, ab); 1.588 + src_argb += 4; 1.589 + dst_u += 1; 1.590 + dst_v += 1; 1.591 + } 1.592 +} 1.593 + 1.594 +void ARGBToUV422Row_C(const uint8* src_argb, 1.595 + uint8* dst_u, uint8* dst_v, int width) { 1.596 + int x; 1.597 + for (x = 0; x < width - 1; x += 2) { 1.598 + uint8 ab = (src_argb[0] + src_argb[4]) >> 1; 1.599 + uint8 ag = (src_argb[1] + src_argb[5]) >> 1; 1.600 + uint8 ar = (src_argb[2] + src_argb[6]) >> 1; 1.601 + dst_u[0] = RGBToU(ar, ag, ab); 1.602 + dst_v[0] = RGBToV(ar, ag, ab); 1.603 + src_argb += 8; 1.604 + dst_u += 1; 1.605 + dst_v += 1; 1.606 + } 1.607 + if (width & 1) { 1.608 + uint8 ab = src_argb[0]; 1.609 + uint8 ag = src_argb[1]; 1.610 + uint8 ar = src_argb[2]; 1.611 + dst_u[0] = RGBToU(ar, ag, ab); 1.612 + dst_v[0] = RGBToV(ar, ag, ab); 1.613 + } 1.614 +} 1.615 + 1.616 +void ARGBToUV411Row_C(const uint8* src_argb, 1.617 + uint8* dst_u, uint8* dst_v, int width) { 1.618 + int x; 1.619 + for (x = 0; x < width - 3; x += 4) { 1.620 + uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; 1.621 + uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; 1.622 + uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; 1.623 + dst_u[0] = RGBToU(ar, ag, ab); 1.624 + dst_v[0] = RGBToV(ar, ag, ab); 1.625 + src_argb += 16; 1.626 + dst_u += 1; 1.627 + dst_v += 1; 1.628 + } 1.629 + if ((width & 3) == 3) { 1.630 + uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3; 1.631 + uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3; 1.632 + uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3; 1.633 + dst_u[0] = RGBToU(ar, ag, ab); 1.634 + dst_v[0] = RGBToV(ar, ag, ab); 1.635 + } else if ((width & 3) == 2) { 1.636 + uint8 ab = (src_argb[0] + src_argb[4]) >> 1; 1.637 + uint8 ag = (src_argb[1] + src_argb[5]) >> 1; 1.638 + uint8 ar = (src_argb[2] + src_argb[6]) >> 1; 1.639 + dst_u[0] = RGBToU(ar, ag, ab); 1.640 + dst_v[0] = RGBToV(ar, ag, ab); 1.641 + } else if ((width & 3) == 1) { 1.642 + uint8 ab = src_argb[0]; 1.643 + uint8 ag = src_argb[1]; 1.644 + uint8 ar = src_argb[2]; 1.645 + dst_u[0] = RGBToU(ar, ag, ab); 1.646 + dst_v[0] = RGBToV(ar, ag, ab); 1.647 + } 1.648 +} 1.649 + 1.650 +void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1.651 + int x; 1.652 + for (x = 0; x < width; ++x) { 1.653 + uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); 1.654 + dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 1.655 + dst_argb[3] = src_argb[3]; 1.656 + dst_argb += 4; 1.657 + src_argb += 4; 1.658 + } 1.659 +} 1.660 + 1.661 +// Convert a row of image to Sepia tone. 1.662 +void ARGBSepiaRow_C(uint8* dst_argb, int width) { 1.663 + int x; 1.664 + for (x = 0; x < width; ++x) { 1.665 + int b = dst_argb[0]; 1.666 + int g = dst_argb[1]; 1.667 + int r = dst_argb[2]; 1.668 + int sb = (b * 17 + g * 68 + r * 35) >> 7; 1.669 + int sg = (b * 22 + g * 88 + r * 45) >> 7; 1.670 + int sr = (b * 24 + g * 98 + r * 50) >> 7; 1.671 + // b does not over flow. a is preserved from original. 1.672 + dst_argb[0] = sb; 1.673 + dst_argb[1] = clamp255(sg); 1.674 + dst_argb[2] = clamp255(sr); 1.675 + dst_argb += 4; 1.676 + } 1.677 +} 1.678 + 1.679 +// Apply color matrix to a row of image. Matrix is signed. 1.680 +// TODO(fbarchard): Consider adding rounding (+32). 1.681 +void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, 1.682 + const int8* matrix_argb, int width) { 1.683 + int x; 1.684 + for (x = 0; x < width; ++x) { 1.685 + int b = src_argb[0]; 1.686 + int g = src_argb[1]; 1.687 + int r = src_argb[2]; 1.688 + int a = src_argb[3]; 1.689 + int sb = (b * matrix_argb[0] + g * matrix_argb[1] + 1.690 + r * matrix_argb[2] + a * matrix_argb[3]) >> 6; 1.691 + int sg = (b * matrix_argb[4] + g * matrix_argb[5] + 1.692 + r * matrix_argb[6] + a * matrix_argb[7]) >> 6; 1.693 + int sr = (b * matrix_argb[8] + g * matrix_argb[9] + 1.694 + r * matrix_argb[10] + a * matrix_argb[11]) >> 6; 1.695 + int sa = (b * matrix_argb[12] + g * matrix_argb[13] + 1.696 + r * matrix_argb[14] + a * matrix_argb[15]) >> 6; 1.697 + dst_argb[0] = Clamp(sb); 1.698 + dst_argb[1] = Clamp(sg); 1.699 + dst_argb[2] = Clamp(sr); 1.700 + dst_argb[3] = Clamp(sa); 1.701 + src_argb += 4; 1.702 + dst_argb += 4; 1.703 + } 1.704 +} 1.705 + 1.706 +// Apply color table to a row of image. 1.707 +void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 1.708 + int x; 1.709 + for (x = 0; x < width; ++x) { 1.710 + int b = dst_argb[0]; 1.711 + int g = dst_argb[1]; 1.712 + int r = dst_argb[2]; 1.713 + int a = dst_argb[3]; 1.714 + dst_argb[0] = table_argb[b * 4 + 0]; 1.715 + dst_argb[1] = table_argb[g * 4 + 1]; 1.716 + dst_argb[2] = table_argb[r * 4 + 2]; 1.717 + dst_argb[3] = table_argb[a * 4 + 3]; 1.718 + dst_argb += 4; 1.719 + } 1.720 +} 1.721 + 1.722 +// Apply color table to a row of image. 1.723 +void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { 1.724 + int x; 1.725 + for (x = 0; x < width; ++x) { 1.726 + int b = dst_argb[0]; 1.727 + int g = dst_argb[1]; 1.728 + int r = dst_argb[2]; 1.729 + dst_argb[0] = table_argb[b * 4 + 0]; 1.730 + dst_argb[1] = table_argb[g * 4 + 1]; 1.731 + dst_argb[2] = table_argb[r * 4 + 2]; 1.732 + dst_argb += 4; 1.733 + } 1.734 +} 1.735 + 1.736 +void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, 1.737 + int interval_offset, int width) { 1.738 + int x; 1.739 + for (x = 0; x < width; ++x) { 1.740 + int b = dst_argb[0]; 1.741 + int g = dst_argb[1]; 1.742 + int r = dst_argb[2]; 1.743 + dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; 1.744 + dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; 1.745 + dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; 1.746 + dst_argb += 4; 1.747 + } 1.748 +} 1.749 + 1.750 +#define REPEAT8(v) (v) | ((v) << 8) 1.751 +#define SHADE(f, v) v * f >> 24 1.752 + 1.753 +void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, 1.754 + uint32 value) { 1.755 + const uint32 b_scale = REPEAT8(value & 0xff); 1.756 + const uint32 g_scale = REPEAT8((value >> 8) & 0xff); 1.757 + const uint32 r_scale = REPEAT8((value >> 16) & 0xff); 1.758 + const uint32 a_scale = REPEAT8(value >> 24); 1.759 + 1.760 + int i; 1.761 + for (i = 0; i < width; ++i) { 1.762 + const uint32 b = REPEAT8(src_argb[0]); 1.763 + const uint32 g = REPEAT8(src_argb[1]); 1.764 + const uint32 r = REPEAT8(src_argb[2]); 1.765 + const uint32 a = REPEAT8(src_argb[3]); 1.766 + dst_argb[0] = SHADE(b, b_scale); 1.767 + dst_argb[1] = SHADE(g, g_scale); 1.768 + dst_argb[2] = SHADE(r, r_scale); 1.769 + dst_argb[3] = SHADE(a, a_scale); 1.770 + src_argb += 4; 1.771 + dst_argb += 4; 1.772 + } 1.773 +} 1.774 +#undef REPEAT8 1.775 +#undef SHADE 1.776 + 1.777 +#define REPEAT8(v) (v) | ((v) << 8) 1.778 +#define SHADE(f, v) v * f >> 16 1.779 + 1.780 +void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1, 1.781 + uint8* dst_argb, int width) { 1.782 + int i; 1.783 + for (i = 0; i < width; ++i) { 1.784 + const uint32 b = REPEAT8(src_argb0[0]); 1.785 + const uint32 g = REPEAT8(src_argb0[1]); 1.786 + const uint32 r = REPEAT8(src_argb0[2]); 1.787 + const uint32 a = REPEAT8(src_argb0[3]); 1.788 + const uint32 b_scale = src_argb1[0]; 1.789 + const uint32 g_scale = src_argb1[1]; 1.790 + const uint32 r_scale = src_argb1[2]; 1.791 + const uint32 a_scale = src_argb1[3]; 1.792 + dst_argb[0] = SHADE(b, b_scale); 1.793 + dst_argb[1] = SHADE(g, g_scale); 1.794 + dst_argb[2] = SHADE(r, r_scale); 1.795 + dst_argb[3] = SHADE(a, a_scale); 1.796 + src_argb0 += 4; 1.797 + src_argb1 += 4; 1.798 + dst_argb += 4; 1.799 + } 1.800 +} 1.801 +#undef REPEAT8 1.802 +#undef SHADE 1.803 + 1.804 +#define SHADE(f, v) clamp255(v + f) 1.805 + 1.806 +void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1, 1.807 + uint8* dst_argb, int width) { 1.808 + int i; 1.809 + for (i = 0; i < width; ++i) { 1.810 + const int b = src_argb0[0]; 1.811 + const int g = src_argb0[1]; 1.812 + const int r = src_argb0[2]; 1.813 + const int a = src_argb0[3]; 1.814 + const int b_add = src_argb1[0]; 1.815 + const int g_add = src_argb1[1]; 1.816 + const int r_add = src_argb1[2]; 1.817 + const int a_add = src_argb1[3]; 1.818 + dst_argb[0] = SHADE(b, b_add); 1.819 + dst_argb[1] = SHADE(g, g_add); 1.820 + dst_argb[2] = SHADE(r, r_add); 1.821 + dst_argb[3] = SHADE(a, a_add); 1.822 + src_argb0 += 4; 1.823 + src_argb1 += 4; 1.824 + dst_argb += 4; 1.825 + } 1.826 +} 1.827 +#undef SHADE 1.828 + 1.829 +#define SHADE(f, v) clamp0(f - v) 1.830 + 1.831 +void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1, 1.832 + uint8* dst_argb, int width) { 1.833 + int i; 1.834 + for (i = 0; i < width; ++i) { 1.835 + const int b = src_argb0[0]; 1.836 + const int g = src_argb0[1]; 1.837 + const int r = src_argb0[2]; 1.838 + const int a = src_argb0[3]; 1.839 + const int b_sub = src_argb1[0]; 1.840 + const int g_sub = src_argb1[1]; 1.841 + const int r_sub = src_argb1[2]; 1.842 + const int a_sub = src_argb1[3]; 1.843 + dst_argb[0] = SHADE(b, b_sub); 1.844 + dst_argb[1] = SHADE(g, g_sub); 1.845 + dst_argb[2] = SHADE(r, r_sub); 1.846 + dst_argb[3] = SHADE(a, a_sub); 1.847 + src_argb0 += 4; 1.848 + src_argb1 += 4; 1.849 + dst_argb += 4; 1.850 + } 1.851 +} 1.852 +#undef SHADE 1.853 + 1.854 +// Sobel functions which mimics SSSE3. 1.855 +void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, 1.856 + uint8* dst_sobelx, int width) { 1.857 + int i; 1.858 + for (i = 0; i < width; ++i) { 1.859 + int a = src_y0[i]; 1.860 + int b = src_y1[i]; 1.861 + int c = src_y2[i]; 1.862 + int a_sub = src_y0[i + 2]; 1.863 + int b_sub = src_y1[i + 2]; 1.864 + int c_sub = src_y2[i + 2]; 1.865 + int a_diff = a - a_sub; 1.866 + int b_diff = b - b_sub; 1.867 + int c_diff = c - c_sub; 1.868 + int sobel = Abs(a_diff + b_diff * 2 + c_diff); 1.869 + dst_sobelx[i] = (uint8)(clamp255(sobel)); 1.870 + } 1.871 +} 1.872 + 1.873 +void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, 1.874 + uint8* dst_sobely, int width) { 1.875 + int i; 1.876 + for (i = 0; i < width; ++i) { 1.877 + int a = src_y0[i + 0]; 1.878 + int b = src_y0[i + 1]; 1.879 + int c = src_y0[i + 2]; 1.880 + int a_sub = src_y1[i + 0]; 1.881 + int b_sub = src_y1[i + 1]; 1.882 + int c_sub = src_y1[i + 2]; 1.883 + int a_diff = a - a_sub; 1.884 + int b_diff = b - b_sub; 1.885 + int c_diff = c - c_sub; 1.886 + int sobel = Abs(a_diff + b_diff * 2 + c_diff); 1.887 + dst_sobely[i] = (uint8)(clamp255(sobel)); 1.888 + } 1.889 +} 1.890 + 1.891 +void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, 1.892 + uint8* dst_argb, int width) { 1.893 + int i; 1.894 + for (i = 0; i < width; ++i) { 1.895 + int r = src_sobelx[i]; 1.896 + int b = src_sobely[i]; 1.897 + int s = clamp255(r + b); 1.898 + dst_argb[0] = (uint8)(s); 1.899 + dst_argb[1] = (uint8)(s); 1.900 + dst_argb[2] = (uint8)(s); 1.901 + dst_argb[3] = (uint8)(255u); 1.902 + dst_argb += 4; 1.903 + } 1.904 +} 1.905 + 1.906 +void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, 1.907 + uint8* dst_y, int width) { 1.908 + int i; 1.909 + for (i = 0; i < width; ++i) { 1.910 + int r = src_sobelx[i]; 1.911 + int b = src_sobely[i]; 1.912 + int s = clamp255(r + b); 1.913 + dst_y[i] = (uint8)(s); 1.914 + } 1.915 +} 1.916 + 1.917 +void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, 1.918 + uint8* dst_argb, int width) { 1.919 + int i; 1.920 + for (i = 0; i < width; ++i) { 1.921 + int r = src_sobelx[i]; 1.922 + int b = src_sobely[i]; 1.923 + int g = clamp255(r + b); 1.924 + dst_argb[0] = (uint8)(b); 1.925 + dst_argb[1] = (uint8)(g); 1.926 + dst_argb[2] = (uint8)(r); 1.927 + dst_argb[3] = (uint8)(255u); 1.928 + dst_argb += 4; 1.929 + } 1.930 +} 1.931 + 1.932 +void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { 1.933 + // Copy a Y to RGB. 1.934 + int x; 1.935 + for (x = 0; x < width; ++x) { 1.936 + uint8 y = src_y[0]; 1.937 + dst_argb[2] = dst_argb[1] = dst_argb[0] = y; 1.938 + dst_argb[3] = 255u; 1.939 + dst_argb += 4; 1.940 + ++src_y; 1.941 + } 1.942 +} 1.943 + 1.944 +// C reference code that mimics the YUV assembly. 1.945 + 1.946 +#define YG 74 /* (int8)(1.164 * 64 + 0.5) */ 1.947 + 1.948 +#define UB 127 /* min(63,(int8)(2.018 * 64)) */ 1.949 +#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ 1.950 +#define UR 0 1.951 + 1.952 +#define VB 0 1.953 +#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ 1.954 +#define VR 102 /* (int8)(1.596 * 64 + 0.5) */ 1.955 + 1.956 +// Bias 1.957 +#define BB UB * 128 + VB * 128 1.958 +#define BG UG * 128 + VG * 128 1.959 +#define BR UR * 128 + VR * 128 1.960 + 1.961 +static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, 1.962 + uint8* b, uint8* g, uint8* r) { 1.963 + int32 y1 = ((int32)(y) - 16) * YG; 1.964 + *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6); 1.965 + *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6); 1.966 + *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6); 1.967 +} 1.968 + 1.969 +#if !defined(LIBYUV_DISABLE_NEON) && \ 1.970 + (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) 1.971 +// C mimic assembly. 1.972 +// TODO(fbarchard): Remove subsampling from Neon. 1.973 +void I444ToARGBRow_C(const uint8* src_y, 1.974 + const uint8* src_u, 1.975 + const uint8* src_v, 1.976 + uint8* rgb_buf, 1.977 + int width) { 1.978 + int x; 1.979 + for (x = 0; x < width - 1; x += 2) { 1.980 + uint8 u = (src_u[0] + src_u[1] + 1) >> 1; 1.981 + uint8 v = (src_v[0] + src_v[1] + 1) >> 1; 1.982 + YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.983 + rgb_buf[3] = 255; 1.984 + YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.985 + rgb_buf[7] = 255; 1.986 + src_y += 2; 1.987 + src_u += 2; 1.988 + src_v += 2; 1.989 + rgb_buf += 8; // Advance 2 pixels. 1.990 + } 1.991 + if (width & 1) { 1.992 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.993 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.994 + } 1.995 +} 1.996 +#else 1.997 +void I444ToARGBRow_C(const uint8* src_y, 1.998 + const uint8* src_u, 1.999 + const uint8* src_v, 1.1000 + uint8* rgb_buf, 1.1001 + int width) { 1.1002 + int x; 1.1003 + for (x = 0; x < width; ++x) { 1.1004 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1005 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1006 + rgb_buf[3] = 255; 1.1007 + src_y += 1; 1.1008 + src_u += 1; 1.1009 + src_v += 1; 1.1010 + rgb_buf += 4; // Advance 1 pixel. 1.1011 + } 1.1012 +} 1.1013 +#endif 1.1014 +// Also used for 420 1.1015 +void I422ToARGBRow_C(const uint8* src_y, 1.1016 + const uint8* src_u, 1.1017 + const uint8* src_v, 1.1018 + uint8* rgb_buf, 1.1019 + int width) { 1.1020 + int x; 1.1021 + for (x = 0; x < width - 1; x += 2) { 1.1022 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1023 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1024 + rgb_buf[3] = 255; 1.1025 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1026 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1027 + rgb_buf[7] = 255; 1.1028 + src_y += 2; 1.1029 + src_u += 1; 1.1030 + src_v += 1; 1.1031 + rgb_buf += 8; // Advance 2 pixels. 1.1032 + } 1.1033 + if (width & 1) { 1.1034 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1035 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1036 + rgb_buf[3] = 255; 1.1037 + } 1.1038 +} 1.1039 + 1.1040 +void I422ToRGB24Row_C(const uint8* src_y, 1.1041 + const uint8* src_u, 1.1042 + const uint8* src_v, 1.1043 + uint8* rgb_buf, 1.1044 + int width) { 1.1045 + int x; 1.1046 + for (x = 0; x < width - 1; x += 2) { 1.1047 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1048 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1049 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1050 + rgb_buf + 3, rgb_buf + 4, rgb_buf + 5); 1.1051 + src_y += 2; 1.1052 + src_u += 1; 1.1053 + src_v += 1; 1.1054 + rgb_buf += 6; // Advance 2 pixels. 1.1055 + } 1.1056 + if (width & 1) { 1.1057 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1058 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1059 + } 1.1060 +} 1.1061 + 1.1062 +void I422ToRAWRow_C(const uint8* src_y, 1.1063 + const uint8* src_u, 1.1064 + const uint8* src_v, 1.1065 + uint8* rgb_buf, 1.1066 + int width) { 1.1067 + int x; 1.1068 + for (x = 0; x < width - 1; x += 2) { 1.1069 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1070 + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1.1071 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1072 + rgb_buf + 5, rgb_buf + 4, rgb_buf + 3); 1.1073 + src_y += 2; 1.1074 + src_u += 1; 1.1075 + src_v += 1; 1.1076 + rgb_buf += 6; // Advance 2 pixels. 1.1077 + } 1.1078 + if (width & 1) { 1.1079 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1080 + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1.1081 + } 1.1082 +} 1.1083 + 1.1084 +void I422ToARGB4444Row_C(const uint8* src_y, 1.1085 + const uint8* src_u, 1.1086 + const uint8* src_v, 1.1087 + uint8* dst_argb4444, 1.1088 + int width) { 1.1089 + uint8 b0; 1.1090 + uint8 g0; 1.1091 + uint8 r0; 1.1092 + uint8 b1; 1.1093 + uint8 g1; 1.1094 + uint8 r1; 1.1095 + int x; 1.1096 + for (x = 0; x < width - 1; x += 2) { 1.1097 + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1.1098 + YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); 1.1099 + b0 = b0 >> 4; 1.1100 + g0 = g0 >> 4; 1.1101 + r0 = r0 >> 4; 1.1102 + b1 = b1 >> 4; 1.1103 + g1 = g1 >> 4; 1.1104 + r1 = r1 >> 4; 1.1105 + *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 1.1106 + (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; 1.1107 + src_y += 2; 1.1108 + src_u += 1; 1.1109 + src_v += 1; 1.1110 + dst_argb4444 += 4; // Advance 2 pixels. 1.1111 + } 1.1112 + if (width & 1) { 1.1113 + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1.1114 + b0 = b0 >> 4; 1.1115 + g0 = g0 >> 4; 1.1116 + r0 = r0 >> 4; 1.1117 + *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 1.1118 + 0xf000; 1.1119 + } 1.1120 +} 1.1121 + 1.1122 +void I422ToARGB1555Row_C(const uint8* src_y, 1.1123 + const uint8* src_u, 1.1124 + const uint8* src_v, 1.1125 + uint8* dst_argb1555, 1.1126 + int width) { 1.1127 + uint8 b0; 1.1128 + uint8 g0; 1.1129 + uint8 r0; 1.1130 + uint8 b1; 1.1131 + uint8 g1; 1.1132 + uint8 r1; 1.1133 + int x; 1.1134 + for (x = 0; x < width - 1; x += 2) { 1.1135 + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1.1136 + YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); 1.1137 + b0 = b0 >> 3; 1.1138 + g0 = g0 >> 3; 1.1139 + r0 = r0 >> 3; 1.1140 + b1 = b1 >> 3; 1.1141 + g1 = g1 >> 3; 1.1142 + r1 = r1 >> 3; 1.1143 + *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 1.1144 + (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; 1.1145 + src_y += 2; 1.1146 + src_u += 1; 1.1147 + src_v += 1; 1.1148 + dst_argb1555 += 4; // Advance 2 pixels. 1.1149 + } 1.1150 + if (width & 1) { 1.1151 + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1.1152 + b0 = b0 >> 3; 1.1153 + g0 = g0 >> 3; 1.1154 + r0 = r0 >> 3; 1.1155 + *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 1.1156 + 0x8000; 1.1157 + } 1.1158 +} 1.1159 + 1.1160 +void I422ToRGB565Row_C(const uint8* src_y, 1.1161 + const uint8* src_u, 1.1162 + const uint8* src_v, 1.1163 + uint8* dst_rgb565, 1.1164 + int width) { 1.1165 + uint8 b0; 1.1166 + uint8 g0; 1.1167 + uint8 r0; 1.1168 + uint8 b1; 1.1169 + uint8 g1; 1.1170 + uint8 r1; 1.1171 + int x; 1.1172 + for (x = 0; x < width - 1; x += 2) { 1.1173 + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1.1174 + YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); 1.1175 + b0 = b0 >> 3; 1.1176 + g0 = g0 >> 2; 1.1177 + r0 = r0 >> 3; 1.1178 + b1 = b1 >> 3; 1.1179 + g1 = g1 >> 2; 1.1180 + r1 = r1 >> 3; 1.1181 + *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1.1182 + (b1 << 16) | (g1 << 21) | (r1 << 27); 1.1183 + src_y += 2; 1.1184 + src_u += 1; 1.1185 + src_v += 1; 1.1186 + dst_rgb565 += 4; // Advance 2 pixels. 1.1187 + } 1.1188 + if (width & 1) { 1.1189 + YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); 1.1190 + b0 = b0 >> 3; 1.1191 + g0 = g0 >> 2; 1.1192 + r0 = r0 >> 3; 1.1193 + *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1.1194 + } 1.1195 +} 1.1196 + 1.1197 +void I411ToARGBRow_C(const uint8* src_y, 1.1198 + const uint8* src_u, 1.1199 + const uint8* src_v, 1.1200 + uint8* rgb_buf, 1.1201 + int width) { 1.1202 + int x; 1.1203 + for (x = 0; x < width - 3; x += 4) { 1.1204 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1205 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1206 + rgb_buf[3] = 255; 1.1207 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1208 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1209 + rgb_buf[7] = 255; 1.1210 + YuvPixel(src_y[2], src_u[0], src_v[0], 1.1211 + rgb_buf + 8, rgb_buf + 9, rgb_buf + 10); 1.1212 + rgb_buf[11] = 255; 1.1213 + YuvPixel(src_y[3], src_u[0], src_v[0], 1.1214 + rgb_buf + 12, rgb_buf + 13, rgb_buf + 14); 1.1215 + rgb_buf[15] = 255; 1.1216 + src_y += 4; 1.1217 + src_u += 1; 1.1218 + src_v += 1; 1.1219 + rgb_buf += 16; // Advance 4 pixels. 1.1220 + } 1.1221 + if (width & 2) { 1.1222 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1223 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1224 + rgb_buf[3] = 255; 1.1225 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1226 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1227 + rgb_buf[7] = 255; 1.1228 + src_y += 2; 1.1229 + rgb_buf += 8; // Advance 2 pixels. 1.1230 + } 1.1231 + if (width & 1) { 1.1232 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1233 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1234 + rgb_buf[3] = 255; 1.1235 + } 1.1236 +} 1.1237 + 1.1238 +void NV12ToARGBRow_C(const uint8* src_y, 1.1239 + const uint8* usrc_v, 1.1240 + uint8* rgb_buf, 1.1241 + int width) { 1.1242 + int x; 1.1243 + for (x = 0; x < width - 1; x += 2) { 1.1244 + YuvPixel(src_y[0], usrc_v[0], usrc_v[1], 1.1245 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1246 + rgb_buf[3] = 255; 1.1247 + YuvPixel(src_y[1], usrc_v[0], usrc_v[1], 1.1248 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1249 + rgb_buf[7] = 255; 1.1250 + src_y += 2; 1.1251 + usrc_v += 2; 1.1252 + rgb_buf += 8; // Advance 2 pixels. 1.1253 + } 1.1254 + if (width & 1) { 1.1255 + YuvPixel(src_y[0], usrc_v[0], usrc_v[1], 1.1256 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1257 + rgb_buf[3] = 255; 1.1258 + } 1.1259 +} 1.1260 + 1.1261 +void NV21ToARGBRow_C(const uint8* src_y, 1.1262 + const uint8* src_vu, 1.1263 + uint8* rgb_buf, 1.1264 + int width) { 1.1265 + int x; 1.1266 + for (x = 0; x < width - 1; x += 2) { 1.1267 + YuvPixel(src_y[0], src_vu[1], src_vu[0], 1.1268 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1269 + rgb_buf[3] = 255; 1.1270 + 1.1271 + YuvPixel(src_y[1], src_vu[1], src_vu[0], 1.1272 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1273 + rgb_buf[7] = 255; 1.1274 + 1.1275 + src_y += 2; 1.1276 + src_vu += 2; 1.1277 + rgb_buf += 8; // Advance 2 pixels. 1.1278 + } 1.1279 + if (width & 1) { 1.1280 + YuvPixel(src_y[0], src_vu[1], src_vu[0], 1.1281 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1282 + rgb_buf[3] = 255; 1.1283 + } 1.1284 +} 1.1285 + 1.1286 +void NV12ToRGB565Row_C(const uint8* src_y, 1.1287 + const uint8* usrc_v, 1.1288 + uint8* dst_rgb565, 1.1289 + int width) { 1.1290 + uint8 b0; 1.1291 + uint8 g0; 1.1292 + uint8 r0; 1.1293 + uint8 b1; 1.1294 + uint8 g1; 1.1295 + uint8 r1; 1.1296 + int x; 1.1297 + for (x = 0; x < width - 1; x += 2) { 1.1298 + YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0); 1.1299 + YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1); 1.1300 + b0 = b0 >> 3; 1.1301 + g0 = g0 >> 2; 1.1302 + r0 = r0 >> 3; 1.1303 + b1 = b1 >> 3; 1.1304 + g1 = g1 >> 2; 1.1305 + r1 = r1 >> 3; 1.1306 + *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1.1307 + (b1 << 16) | (g1 << 21) | (r1 << 27); 1.1308 + src_y += 2; 1.1309 + usrc_v += 2; 1.1310 + dst_rgb565 += 4; // Advance 2 pixels. 1.1311 + } 1.1312 + if (width & 1) { 1.1313 + YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0); 1.1314 + b0 = b0 >> 3; 1.1315 + g0 = g0 >> 2; 1.1316 + r0 = r0 >> 3; 1.1317 + *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1.1318 + } 1.1319 +} 1.1320 + 1.1321 +void NV21ToRGB565Row_C(const uint8* src_y, 1.1322 + const uint8* vsrc_u, 1.1323 + uint8* dst_rgb565, 1.1324 + int width) { 1.1325 + uint8 b0; 1.1326 + uint8 g0; 1.1327 + uint8 r0; 1.1328 + uint8 b1; 1.1329 + uint8 g1; 1.1330 + uint8 r1; 1.1331 + int x; 1.1332 + for (x = 0; x < width - 1; x += 2) { 1.1333 + YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); 1.1334 + YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1); 1.1335 + b0 = b0 >> 3; 1.1336 + g0 = g0 >> 2; 1.1337 + r0 = r0 >> 3; 1.1338 + b1 = b1 >> 3; 1.1339 + g1 = g1 >> 2; 1.1340 + r1 = r1 >> 3; 1.1341 + *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | 1.1342 + (b1 << 16) | (g1 << 21) | (r1 << 27); 1.1343 + src_y += 2; 1.1344 + vsrc_u += 2; 1.1345 + dst_rgb565 += 4; // Advance 2 pixels. 1.1346 + } 1.1347 + if (width & 1) { 1.1348 + YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); 1.1349 + b0 = b0 >> 3; 1.1350 + g0 = g0 >> 2; 1.1351 + r0 = r0 >> 3; 1.1352 + *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); 1.1353 + } 1.1354 +} 1.1355 + 1.1356 +void YUY2ToARGBRow_C(const uint8* src_yuy2, 1.1357 + uint8* rgb_buf, 1.1358 + int width) { 1.1359 + int x; 1.1360 + for (x = 0; x < width - 1; x += 2) { 1.1361 + YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], 1.1362 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1363 + rgb_buf[3] = 255; 1.1364 + YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], 1.1365 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1366 + rgb_buf[7] = 255; 1.1367 + src_yuy2 += 4; 1.1368 + rgb_buf += 8; // Advance 2 pixels. 1.1369 + } 1.1370 + if (width & 1) { 1.1371 + YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], 1.1372 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1373 + rgb_buf[3] = 255; 1.1374 + } 1.1375 +} 1.1376 + 1.1377 +void UYVYToARGBRow_C(const uint8* src_uyvy, 1.1378 + uint8* rgb_buf, 1.1379 + int width) { 1.1380 + int x; 1.1381 + for (x = 0; x < width - 1; x += 2) { 1.1382 + YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], 1.1383 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1384 + rgb_buf[3] = 255; 1.1385 + YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], 1.1386 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1387 + rgb_buf[7] = 255; 1.1388 + src_uyvy += 4; 1.1389 + rgb_buf += 8; // Advance 2 pixels. 1.1390 + } 1.1391 + if (width & 1) { 1.1392 + YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], 1.1393 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1394 + rgb_buf[3] = 255; 1.1395 + } 1.1396 +} 1.1397 + 1.1398 +void I422ToBGRARow_C(const uint8* src_y, 1.1399 + const uint8* src_u, 1.1400 + const uint8* src_v, 1.1401 + uint8* rgb_buf, 1.1402 + int width) { 1.1403 + int x; 1.1404 + for (x = 0; x < width - 1; x += 2) { 1.1405 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1406 + rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); 1.1407 + rgb_buf[0] = 255; 1.1408 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1409 + rgb_buf + 7, rgb_buf + 6, rgb_buf + 5); 1.1410 + rgb_buf[4] = 255; 1.1411 + src_y += 2; 1.1412 + src_u += 1; 1.1413 + src_v += 1; 1.1414 + rgb_buf += 8; // Advance 2 pixels. 1.1415 + } 1.1416 + if (width & 1) { 1.1417 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1418 + rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); 1.1419 + rgb_buf[0] = 255; 1.1420 + } 1.1421 +} 1.1422 + 1.1423 +void I422ToABGRRow_C(const uint8* src_y, 1.1424 + const uint8* src_u, 1.1425 + const uint8* src_v, 1.1426 + uint8* rgb_buf, 1.1427 + int width) { 1.1428 + int x; 1.1429 + for (x = 0; x < width - 1; x += 2) { 1.1430 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1431 + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1.1432 + rgb_buf[3] = 255; 1.1433 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1434 + rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); 1.1435 + rgb_buf[7] = 255; 1.1436 + src_y += 2; 1.1437 + src_u += 1; 1.1438 + src_v += 1; 1.1439 + rgb_buf += 8; // Advance 2 pixels. 1.1440 + } 1.1441 + if (width & 1) { 1.1442 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1443 + rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); 1.1444 + rgb_buf[3] = 255; 1.1445 + } 1.1446 +} 1.1447 + 1.1448 +void I422ToRGBARow_C(const uint8* src_y, 1.1449 + const uint8* src_u, 1.1450 + const uint8* src_v, 1.1451 + uint8* rgb_buf, 1.1452 + int width) { 1.1453 + int x; 1.1454 + for (x = 0; x < width - 1; x += 2) { 1.1455 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1456 + rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); 1.1457 + rgb_buf[0] = 255; 1.1458 + YuvPixel(src_y[1], src_u[0], src_v[0], 1.1459 + rgb_buf + 5, rgb_buf + 6, rgb_buf + 7); 1.1460 + rgb_buf[4] = 255; 1.1461 + src_y += 2; 1.1462 + src_u += 1; 1.1463 + src_v += 1; 1.1464 + rgb_buf += 8; // Advance 2 pixels. 1.1465 + } 1.1466 + if (width & 1) { 1.1467 + YuvPixel(src_y[0], src_u[0], src_v[0], 1.1468 + rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); 1.1469 + rgb_buf[0] = 255; 1.1470 + } 1.1471 +} 1.1472 + 1.1473 +void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { 1.1474 + int x; 1.1475 + for (x = 0; x < width - 1; x += 2) { 1.1476 + YuvPixel(src_y[0], 128, 128, 1.1477 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1478 + rgb_buf[3] = 255; 1.1479 + YuvPixel(src_y[1], 128, 128, 1.1480 + rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); 1.1481 + rgb_buf[7] = 255; 1.1482 + src_y += 2; 1.1483 + rgb_buf += 8; // Advance 2 pixels. 1.1484 + } 1.1485 + if (width & 1) { 1.1486 + YuvPixel(src_y[0], 128, 128, 1.1487 + rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); 1.1488 + rgb_buf[3] = 255; 1.1489 + } 1.1490 +} 1.1491 + 1.1492 +void MirrorRow_C(const uint8* src, uint8* dst, int width) { 1.1493 + int x; 1.1494 + src += width - 1; 1.1495 + for (x = 0; x < width - 1; x += 2) { 1.1496 + dst[x] = src[0]; 1.1497 + dst[x + 1] = src[-1]; 1.1498 + src -= 2; 1.1499 + } 1.1500 + if (width & 1) { 1.1501 + dst[width - 1] = src[0]; 1.1502 + } 1.1503 +} 1.1504 + 1.1505 +void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1.1506 + int x; 1.1507 + src_uv += (width - 1) << 1; 1.1508 + for (x = 0; x < width - 1; x += 2) { 1.1509 + dst_u[x] = src_uv[0]; 1.1510 + dst_u[x + 1] = src_uv[-2]; 1.1511 + dst_v[x] = src_uv[1]; 1.1512 + dst_v[x + 1] = src_uv[-2 + 1]; 1.1513 + src_uv -= 4; 1.1514 + } 1.1515 + if (width & 1) { 1.1516 + dst_u[width - 1] = src_uv[0]; 1.1517 + dst_v[width - 1] = src_uv[1]; 1.1518 + } 1.1519 +} 1.1520 + 1.1521 +void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { 1.1522 + int x; 1.1523 + const uint32* src32 = (const uint32*)(src); 1.1524 + uint32* dst32 = (uint32*)(dst); 1.1525 + src32 += width - 1; 1.1526 + for (x = 0; x < width - 1; x += 2) { 1.1527 + dst32[x] = src32[0]; 1.1528 + dst32[x + 1] = src32[-1]; 1.1529 + src32 -= 2; 1.1530 + } 1.1531 + if (width & 1) { 1.1532 + dst32[width - 1] = src32[0]; 1.1533 + } 1.1534 +} 1.1535 + 1.1536 +void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { 1.1537 + int x; 1.1538 + for (x = 0; x < width - 1; x += 2) { 1.1539 + dst_u[x] = src_uv[0]; 1.1540 + dst_u[x + 1] = src_uv[2]; 1.1541 + dst_v[x] = src_uv[1]; 1.1542 + dst_v[x + 1] = src_uv[3]; 1.1543 + src_uv += 4; 1.1544 + } 1.1545 + if (width & 1) { 1.1546 + dst_u[width - 1] = src_uv[0]; 1.1547 + dst_v[width - 1] = src_uv[1]; 1.1548 + } 1.1549 +} 1.1550 + 1.1551 +void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, 1.1552 + int width) { 1.1553 + int x; 1.1554 + for (x = 0; x < width - 1; x += 2) { 1.1555 + dst_uv[0] = src_u[x]; 1.1556 + dst_uv[1] = src_v[x]; 1.1557 + dst_uv[2] = src_u[x + 1]; 1.1558 + dst_uv[3] = src_v[x + 1]; 1.1559 + dst_uv += 4; 1.1560 + } 1.1561 + if (width & 1) { 1.1562 + dst_uv[0] = src_u[width - 1]; 1.1563 + dst_uv[1] = src_v[width - 1]; 1.1564 + } 1.1565 +} 1.1566 + 1.1567 +void CopyRow_C(const uint8* src, uint8* dst, int count) { 1.1568 + memcpy(dst, src, count); 1.1569 +} 1.1570 + 1.1571 +void SetRow_C(uint8* dst, uint32 v8, int count) { 1.1572 +#ifdef _MSC_VER 1.1573 + // VC will generate rep stosb. 1.1574 + int x; 1.1575 + for (x = 0; x < count; ++x) { 1.1576 + dst[x] = v8; 1.1577 + } 1.1578 +#else 1.1579 + memset(dst, v8, count); 1.1580 +#endif 1.1581 +} 1.1582 + 1.1583 +void ARGBSetRows_C(uint8* dst, uint32 v32, int width, 1.1584 + int dst_stride, int height) { 1.1585 + int y; 1.1586 + for (y = 0; y < height; ++y) { 1.1587 + uint32* d = (uint32*)(dst); 1.1588 + int x; 1.1589 + for (x = 0; x < width; ++x) { 1.1590 + d[x] = v32; 1.1591 + } 1.1592 + dst += dst_stride; 1.1593 + } 1.1594 +} 1.1595 + 1.1596 +// Filter 2 rows of YUY2 UV's (422) into U and V (420). 1.1597 +void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, 1.1598 + uint8* dst_u, uint8* dst_v, int width) { 1.1599 + // Output a row of UV values, filtering 2 rows of YUY2. 1.1600 + int x; 1.1601 + for (x = 0; x < width; x += 2) { 1.1602 + dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; 1.1603 + dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; 1.1604 + src_yuy2 += 4; 1.1605 + dst_u += 1; 1.1606 + dst_v += 1; 1.1607 + } 1.1608 +} 1.1609 + 1.1610 +// Copy row of YUY2 UV's (422) into U and V (422). 1.1611 +void YUY2ToUV422Row_C(const uint8* src_yuy2, 1.1612 + uint8* dst_u, uint8* dst_v, int width) { 1.1613 + // Output a row of UV values. 1.1614 + int x; 1.1615 + for (x = 0; x < width; x += 2) { 1.1616 + dst_u[0] = src_yuy2[1]; 1.1617 + dst_v[0] = src_yuy2[3]; 1.1618 + src_yuy2 += 4; 1.1619 + dst_u += 1; 1.1620 + dst_v += 1; 1.1621 + } 1.1622 +} 1.1623 + 1.1624 +// Copy row of YUY2 Y's (422) into Y (420/422). 1.1625 +void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { 1.1626 + // Output a row of Y values. 1.1627 + int x; 1.1628 + for (x = 0; x < width - 1; x += 2) { 1.1629 + dst_y[x] = src_yuy2[0]; 1.1630 + dst_y[x + 1] = src_yuy2[2]; 1.1631 + src_yuy2 += 4; 1.1632 + } 1.1633 + if (width & 1) { 1.1634 + dst_y[width - 1] = src_yuy2[0]; 1.1635 + } 1.1636 +} 1.1637 + 1.1638 +// Filter 2 rows of UYVY UV's (422) into U and V (420). 1.1639 +void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, 1.1640 + uint8* dst_u, uint8* dst_v, int width) { 1.1641 + // Output a row of UV values. 1.1642 + int x; 1.1643 + for (x = 0; x < width; x += 2) { 1.1644 + dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; 1.1645 + dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; 1.1646 + src_uyvy += 4; 1.1647 + dst_u += 1; 1.1648 + dst_v += 1; 1.1649 + } 1.1650 +} 1.1651 + 1.1652 +// Copy row of UYVY UV's (422) into U and V (422). 1.1653 +void UYVYToUV422Row_C(const uint8* src_uyvy, 1.1654 + uint8* dst_u, uint8* dst_v, int width) { 1.1655 + // Output a row of UV values. 1.1656 + int x; 1.1657 + for (x = 0; x < width; x += 2) { 1.1658 + dst_u[0] = src_uyvy[0]; 1.1659 + dst_v[0] = src_uyvy[2]; 1.1660 + src_uyvy += 4; 1.1661 + dst_u += 1; 1.1662 + dst_v += 1; 1.1663 + } 1.1664 +} 1.1665 + 1.1666 +// Copy row of UYVY Y's (422) into Y (420/422). 1.1667 +void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { 1.1668 + // Output a row of Y values. 1.1669 + int x; 1.1670 + for (x = 0; x < width - 1; x += 2) { 1.1671 + dst_y[x] = src_uyvy[1]; 1.1672 + dst_y[x + 1] = src_uyvy[3]; 1.1673 + src_uyvy += 4; 1.1674 + } 1.1675 + if (width & 1) { 1.1676 + dst_y[width - 1] = src_uyvy[1]; 1.1677 + } 1.1678 +} 1.1679 + 1.1680 +#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f 1.1681 + 1.1682 +// Blend src_argb0 over src_argb1 and store to dst_argb. 1.1683 +// dst_argb may be src_argb0 or src_argb1. 1.1684 +// This code mimics the SSSE3 version for better testability. 1.1685 +void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, 1.1686 + uint8* dst_argb, int width) { 1.1687 + int x; 1.1688 + for (x = 0; x < width - 1; x += 2) { 1.1689 + uint32 fb = src_argb0[0]; 1.1690 + uint32 fg = src_argb0[1]; 1.1691 + uint32 fr = src_argb0[2]; 1.1692 + uint32 a = src_argb0[3]; 1.1693 + uint32 bb = src_argb1[0]; 1.1694 + uint32 bg = src_argb1[1]; 1.1695 + uint32 br = src_argb1[2]; 1.1696 + dst_argb[0] = BLEND(fb, bb, a); 1.1697 + dst_argb[1] = BLEND(fg, bg, a); 1.1698 + dst_argb[2] = BLEND(fr, br, a); 1.1699 + dst_argb[3] = 255u; 1.1700 + 1.1701 + fb = src_argb0[4 + 0]; 1.1702 + fg = src_argb0[4 + 1]; 1.1703 + fr = src_argb0[4 + 2]; 1.1704 + a = src_argb0[4 + 3]; 1.1705 + bb = src_argb1[4 + 0]; 1.1706 + bg = src_argb1[4 + 1]; 1.1707 + br = src_argb1[4 + 2]; 1.1708 + dst_argb[4 + 0] = BLEND(fb, bb, a); 1.1709 + dst_argb[4 + 1] = BLEND(fg, bg, a); 1.1710 + dst_argb[4 + 2] = BLEND(fr, br, a); 1.1711 + dst_argb[4 + 3] = 255u; 1.1712 + src_argb0 += 8; 1.1713 + src_argb1 += 8; 1.1714 + dst_argb += 8; 1.1715 + } 1.1716 + 1.1717 + if (width & 1) { 1.1718 + uint32 fb = src_argb0[0]; 1.1719 + uint32 fg = src_argb0[1]; 1.1720 + uint32 fr = src_argb0[2]; 1.1721 + uint32 a = src_argb0[3]; 1.1722 + uint32 bb = src_argb1[0]; 1.1723 + uint32 bg = src_argb1[1]; 1.1724 + uint32 br = src_argb1[2]; 1.1725 + dst_argb[0] = BLEND(fb, bb, a); 1.1726 + dst_argb[1] = BLEND(fg, bg, a); 1.1727 + dst_argb[2] = BLEND(fr, br, a); 1.1728 + dst_argb[3] = 255u; 1.1729 + } 1.1730 +} 1.1731 +#undef BLEND 1.1732 +#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 1.1733 + 1.1734 +// Multiply source RGB by alpha and store to destination. 1.1735 +// This code mimics the SSSE3 version for better testability. 1.1736 +void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1.1737 + int i; 1.1738 + for (i = 0; i < width - 1; i += 2) { 1.1739 + uint32 b = src_argb[0]; 1.1740 + uint32 g = src_argb[1]; 1.1741 + uint32 r = src_argb[2]; 1.1742 + uint32 a = src_argb[3]; 1.1743 + dst_argb[0] = ATTENUATE(b, a); 1.1744 + dst_argb[1] = ATTENUATE(g, a); 1.1745 + dst_argb[2] = ATTENUATE(r, a); 1.1746 + dst_argb[3] = a; 1.1747 + b = src_argb[4]; 1.1748 + g = src_argb[5]; 1.1749 + r = src_argb[6]; 1.1750 + a = src_argb[7]; 1.1751 + dst_argb[4] = ATTENUATE(b, a); 1.1752 + dst_argb[5] = ATTENUATE(g, a); 1.1753 + dst_argb[6] = ATTENUATE(r, a); 1.1754 + dst_argb[7] = a; 1.1755 + src_argb += 8; 1.1756 + dst_argb += 8; 1.1757 + } 1.1758 + 1.1759 + if (width & 1) { 1.1760 + const uint32 b = src_argb[0]; 1.1761 + const uint32 g = src_argb[1]; 1.1762 + const uint32 r = src_argb[2]; 1.1763 + const uint32 a = src_argb[3]; 1.1764 + dst_argb[0] = ATTENUATE(b, a); 1.1765 + dst_argb[1] = ATTENUATE(g, a); 1.1766 + dst_argb[2] = ATTENUATE(r, a); 1.1767 + dst_argb[3] = a; 1.1768 + } 1.1769 +} 1.1770 +#undef ATTENUATE 1.1771 + 1.1772 +// Divide source RGB by alpha and store to destination. 1.1773 +// b = (b * 255 + (a / 2)) / a; 1.1774 +// g = (g * 255 + (a / 2)) / a; 1.1775 +// r = (r * 255 + (a / 2)) / a; 1.1776 +// Reciprocal method is off by 1 on some values. ie 125 1.1777 +// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. 1.1778 +#define T(a) 0x01000000 + (0x10000 / a) 1.1779 +const uint32 fixed_invtbl8[256] = { 1.1780 + 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), 1.1781 + T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), 1.1782 + T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), 1.1783 + T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), 1.1784 + T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), 1.1785 + T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), 1.1786 + T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), 1.1787 + T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), 1.1788 + T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), 1.1789 + T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), 1.1790 + T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), 1.1791 + T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), 1.1792 + T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), 1.1793 + T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), 1.1794 + T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), 1.1795 + T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), 1.1796 + T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), 1.1797 + T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), 1.1798 + T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), 1.1799 + T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), 1.1800 + T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), 1.1801 + T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), 1.1802 + T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), 1.1803 + T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), 1.1804 + T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), 1.1805 + T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), 1.1806 + T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), 1.1807 + T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), 1.1808 + T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), 1.1809 + T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), 1.1810 + T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), 1.1811 + T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 }; 1.1812 +#undef T 1.1813 + 1.1814 +void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { 1.1815 + int i; 1.1816 + for (i = 0; i < width; ++i) { 1.1817 + uint32 b = src_argb[0]; 1.1818 + uint32 g = src_argb[1]; 1.1819 + uint32 r = src_argb[2]; 1.1820 + const uint32 a = src_argb[3]; 1.1821 + const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point 1.1822 + b = (b * ia) >> 8; 1.1823 + g = (g * ia) >> 8; 1.1824 + r = (r * ia) >> 8; 1.1825 + // Clamping should not be necessary but is free in assembly. 1.1826 + dst_argb[0] = clamp255(b); 1.1827 + dst_argb[1] = clamp255(g); 1.1828 + dst_argb[2] = clamp255(r); 1.1829 + dst_argb[3] = a; 1.1830 + src_argb += 4; 1.1831 + dst_argb += 4; 1.1832 + } 1.1833 +} 1.1834 + 1.1835 +void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, 1.1836 + const int32* previous_cumsum, int width) { 1.1837 + int32 row_sum[4] = {0, 0, 0, 0}; 1.1838 + int x; 1.1839 + for (x = 0; x < width; ++x) { 1.1840 + row_sum[0] += row[x * 4 + 0]; 1.1841 + row_sum[1] += row[x * 4 + 1]; 1.1842 + row_sum[2] += row[x * 4 + 2]; 1.1843 + row_sum[3] += row[x * 4 + 3]; 1.1844 + cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; 1.1845 + cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; 1.1846 + cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; 1.1847 + cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; 1.1848 + } 1.1849 +} 1.1850 + 1.1851 +void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, 1.1852 + int w, int area, uint8* dst, int count) { 1.1853 + float ooa = 1.0f / area; 1.1854 + int i; 1.1855 + for (i = 0; i < count; ++i) { 1.1856 + dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); 1.1857 + dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); 1.1858 + dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); 1.1859 + dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); 1.1860 + dst += 4; 1.1861 + tl += 4; 1.1862 + bl += 4; 1.1863 + } 1.1864 +} 1.1865 + 1.1866 +// Copy pixels from rotated source to destination row with a slope. 1.1867 +LIBYUV_API 1.1868 +void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, 1.1869 + uint8* dst_argb, const float* uv_dudv, int width) { 1.1870 + int i; 1.1871 + // Render a row of pixels from source into a buffer. 1.1872 + float uv[2]; 1.1873 + uv[0] = uv_dudv[0]; 1.1874 + uv[1] = uv_dudv[1]; 1.1875 + for (i = 0; i < width; ++i) { 1.1876 + int x = (int)(uv[0]); 1.1877 + int y = (int)(uv[1]); 1.1878 + *(uint32*)(dst_argb) = 1.1879 + *(const uint32*)(src_argb + y * src_argb_stride + 1.1880 + x * 4); 1.1881 + dst_argb += 4; 1.1882 + uv[0] += uv_dudv[2]; 1.1883 + uv[1] += uv_dudv[3]; 1.1884 + } 1.1885 +} 1.1886 + 1.1887 +// Blend 2 rows into 1 for conversions such as I422ToI420. 1.1888 +void HalfRow_C(const uint8* src_uv, int src_uv_stride, 1.1889 + uint8* dst_uv, int pix) { 1.1890 + int x; 1.1891 + for (x = 0; x < pix; ++x) { 1.1892 + dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; 1.1893 + } 1.1894 +} 1.1895 + 1.1896 +// C version 2x2 -> 2x1. 1.1897 +void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, 1.1898 + ptrdiff_t src_stride, 1.1899 + int width, int source_y_fraction) { 1.1900 + int y1_fraction = source_y_fraction; 1.1901 + int y0_fraction = 256 - y1_fraction; 1.1902 + const uint8* src_ptr1 = src_ptr + src_stride; 1.1903 + int x; 1.1904 + if (source_y_fraction == 0) { 1.1905 + memcpy(dst_ptr, src_ptr, width); 1.1906 + return; 1.1907 + } 1.1908 + if (source_y_fraction == 128) { 1.1909 + HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width); 1.1910 + return; 1.1911 + } 1.1912 + for (x = 0; x < width - 1; x += 2) { 1.1913 + dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 1.1914 + dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; 1.1915 + src_ptr += 2; 1.1916 + src_ptr1 += 2; 1.1917 + dst_ptr += 2; 1.1918 + } 1.1919 + if (width & 1) { 1.1920 + dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; 1.1921 + } 1.1922 +} 1.1923 + 1.1924 +// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG 1.1925 +void ARGBToBayerRow_C(const uint8* src_argb, 1.1926 + uint8* dst_bayer, uint32 selector, int pix) { 1.1927 + int index0 = selector & 0xff; 1.1928 + int index1 = (selector >> 8) & 0xff; 1.1929 + // Copy a row of Bayer. 1.1930 + int x; 1.1931 + for (x = 0; x < pix - 1; x += 2) { 1.1932 + dst_bayer[0] = src_argb[index0]; 1.1933 + dst_bayer[1] = src_argb[index1]; 1.1934 + src_argb += 8; 1.1935 + dst_bayer += 2; 1.1936 + } 1.1937 + if (pix & 1) { 1.1938 + dst_bayer[0] = src_argb[index0]; 1.1939 + } 1.1940 +} 1.1941 + 1.1942 +// Select G channel from ARGB. e.g. GGGGGGGG 1.1943 +void ARGBToBayerGGRow_C(const uint8* src_argb, 1.1944 + uint8* dst_bayer, uint32 selector, int pix) { 1.1945 + // Copy a row of G. 1.1946 + int x; 1.1947 + for (x = 0; x < pix - 1; x += 2) { 1.1948 + dst_bayer[0] = src_argb[1]; 1.1949 + dst_bayer[1] = src_argb[5]; 1.1950 + src_argb += 8; 1.1951 + dst_bayer += 2; 1.1952 + } 1.1953 + if (pix & 1) { 1.1954 + dst_bayer[0] = src_argb[1]; 1.1955 + } 1.1956 +} 1.1957 + 1.1958 +// Use first 4 shuffler values to reorder ARGB channels. 1.1959 +void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, 1.1960 + const uint8* shuffler, int pix) { 1.1961 + int index0 = shuffler[0]; 1.1962 + int index1 = shuffler[1]; 1.1963 + int index2 = shuffler[2]; 1.1964 + int index3 = shuffler[3]; 1.1965 + // Shuffle a row of ARGB. 1.1966 + int x; 1.1967 + for (x = 0; x < pix; ++x) { 1.1968 + // To support in-place conversion. 1.1969 + uint8 b = src_argb[index0]; 1.1970 + uint8 g = src_argb[index1]; 1.1971 + uint8 r = src_argb[index2]; 1.1972 + uint8 a = src_argb[index3]; 1.1973 + dst_argb[0] = b; 1.1974 + dst_argb[1] = g; 1.1975 + dst_argb[2] = r; 1.1976 + dst_argb[3] = a; 1.1977 + src_argb += 4; 1.1978 + dst_argb += 4; 1.1979 + } 1.1980 +} 1.1981 + 1.1982 +void I422ToYUY2Row_C(const uint8* src_y, 1.1983 + const uint8* src_u, 1.1984 + const uint8* src_v, 1.1985 + uint8* dst_frame, int width) { 1.1986 + int x; 1.1987 + for (x = 0; x < width - 1; x += 2) { 1.1988 + dst_frame[0] = src_y[0]; 1.1989 + dst_frame[1] = src_u[0]; 1.1990 + dst_frame[2] = src_y[1]; 1.1991 + dst_frame[3] = src_v[0]; 1.1992 + dst_frame += 4; 1.1993 + src_y += 2; 1.1994 + src_u += 1; 1.1995 + src_v += 1; 1.1996 + } 1.1997 + if (width & 1) { 1.1998 + dst_frame[0] = src_y[0]; 1.1999 + dst_frame[1] = src_u[0]; 1.2000 + dst_frame[2] = src_y[0]; // duplicate last y 1.2001 + dst_frame[3] = src_v[0]; 1.2002 + } 1.2003 +} 1.2004 + 1.2005 +void I422ToUYVYRow_C(const uint8* src_y, 1.2006 + const uint8* src_u, 1.2007 + const uint8* src_v, 1.2008 + uint8* dst_frame, int width) { 1.2009 + int x; 1.2010 + for (x = 0; x < width - 1; x += 2) { 1.2011 + dst_frame[0] = src_u[0]; 1.2012 + dst_frame[1] = src_y[0]; 1.2013 + dst_frame[2] = src_v[0]; 1.2014 + dst_frame[3] = src_y[1]; 1.2015 + dst_frame += 4; 1.2016 + src_y += 2; 1.2017 + src_u += 1; 1.2018 + src_v += 1; 1.2019 + } 1.2020 + if (width & 1) { 1.2021 + dst_frame[0] = src_u[0]; 1.2022 + dst_frame[1] = src_y[0]; 1.2023 + dst_frame[2] = src_v[0]; 1.2024 + dst_frame[3] = src_y[0]; // duplicate last y 1.2025 + } 1.2026 +} 1.2027 + 1.2028 +#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3) 1.2029 +// row_win.cc has asm version, but GCC uses 2 step wrapper. 1.2030 +#if defined(__x86_64__) || defined(__i386__) 1.2031 +void I422ToRGB565Row_SSSE3(const uint8* src_y, 1.2032 + const uint8* src_u, 1.2033 + const uint8* src_v, 1.2034 + uint8* rgb_buf, 1.2035 + int width) { 1.2036 + // Allocate a row of ARGB. 1.2037 + align_buffer_64(row, width * 4); 1.2038 + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); 1.2039 + ARGBToRGB565Row_SSE2(row, rgb_buf, width); 1.2040 + free_aligned_buffer_64(row); 1.2041 +} 1.2042 +#endif // defined(__x86_64__) || defined(__i386__) 1.2043 + 1.2044 +#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) 1.2045 +void I422ToARGB1555Row_SSSE3(const uint8* src_y, 1.2046 + const uint8* src_u, 1.2047 + const uint8* src_v, 1.2048 + uint8* rgb_buf, 1.2049 + int width) { 1.2050 + // Allocate a row of ARGB. 1.2051 + align_buffer_64(row, width * 4); 1.2052 + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); 1.2053 + ARGBToARGB1555Row_SSE2(row, rgb_buf, width); 1.2054 + free_aligned_buffer_64(row); 1.2055 +} 1.2056 + 1.2057 +void I422ToARGB4444Row_SSSE3(const uint8* src_y, 1.2058 + const uint8* src_u, 1.2059 + const uint8* src_v, 1.2060 + uint8* rgb_buf, 1.2061 + int width) { 1.2062 + // Allocate a row of ARGB. 1.2063 + align_buffer_64(row, width * 4); 1.2064 + I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); 1.2065 + ARGBToARGB4444Row_SSE2(row, rgb_buf, width); 1.2066 + free_aligned_buffer_64(row); 1.2067 +} 1.2068 + 1.2069 +void NV12ToRGB565Row_SSSE3(const uint8* src_y, 1.2070 + const uint8* src_uv, 1.2071 + uint8* dst_rgb565, 1.2072 + int width) { 1.2073 + // Allocate a row of ARGB. 1.2074 + align_buffer_64(row, width * 4); 1.2075 + NV12ToARGBRow_SSSE3(src_y, src_uv, row, width); 1.2076 + ARGBToRGB565Row_SSE2(row, dst_rgb565, width); 1.2077 + free_aligned_buffer_64(row); 1.2078 +} 1.2079 + 1.2080 +void NV21ToRGB565Row_SSSE3(const uint8* src_y, 1.2081 + const uint8* src_vu, 1.2082 + uint8* dst_rgb565, 1.2083 + int width) { 1.2084 + // Allocate a row of ARGB. 1.2085 + align_buffer_64(row, width * 4); 1.2086 + NV21ToARGBRow_SSSE3(src_y, src_vu, row, width); 1.2087 + ARGBToRGB565Row_SSE2(row, dst_rgb565, width); 1.2088 + free_aligned_buffer_64(row); 1.2089 +} 1.2090 + 1.2091 +void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, 1.2092 + uint8* dst_argb, 1.2093 + int width) { 1.2094 + // Allocate a rows of yuv. 1.2095 + align_buffer_64(row_y, ((width + 63) & ~63) * 2); 1.2096 + uint8* row_u = row_y + ((width + 63) & ~63); 1.2097 + uint8* row_v = row_u + ((width + 63) & ~63) / 2; 1.2098 + YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width); 1.2099 + YUY2ToYRow_SSE2(src_yuy2, row_y, width); 1.2100 + I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width); 1.2101 + free_aligned_buffer_64(row_y); 1.2102 +} 1.2103 + 1.2104 +void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, 1.2105 + uint8* dst_argb, 1.2106 + int width) { 1.2107 + // Allocate a rows of yuv. 1.2108 + align_buffer_64(row_y, ((width + 63) & ~63) * 2); 1.2109 + uint8* row_u = row_y + ((width + 63) & ~63); 1.2110 + uint8* row_v = row_u + ((width + 63) & ~63) / 2; 1.2111 + YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width); 1.2112 + YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width); 1.2113 + I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width); 1.2114 + free_aligned_buffer_64(row_y); 1.2115 +} 1.2116 + 1.2117 +void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, 1.2118 + uint8* dst_argb, 1.2119 + int width) { 1.2120 + // Allocate a rows of yuv. 1.2121 + align_buffer_64(row_y, ((width + 63) & ~63) * 2); 1.2122 + uint8* row_u = row_y + ((width + 63) & ~63); 1.2123 + uint8* row_v = row_u + ((width + 63) & ~63) / 2; 1.2124 + UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width); 1.2125 + UYVYToYRow_SSE2(src_uyvy, row_y, width); 1.2126 + I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width); 1.2127 + free_aligned_buffer_64(row_y); 1.2128 +} 1.2129 + 1.2130 +void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, 1.2131 + uint8* dst_argb, 1.2132 + int width) { 1.2133 + // Allocate a rows of yuv. 1.2134 + align_buffer_64(row_y, ((width + 63) & ~63) * 2); 1.2135 + uint8* row_u = row_y + ((width + 63) & ~63); 1.2136 + uint8* row_v = row_u + ((width + 63) & ~63) / 2; 1.2137 + UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width); 1.2138 + UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width); 1.2139 + I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width); 1.2140 + free_aligned_buffer_64(row_y); 1.2141 +} 1.2142 + 1.2143 +#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) 1.2144 +#endif // !defined(LIBYUV_DISABLE_X86) 1.2145 + 1.2146 +void ARGBPolynomialRow_C(const uint8* src_argb, 1.2147 + uint8* dst_argb, const float* poly, 1.2148 + int width) { 1.2149 + int i; 1.2150 + for (i = 0; i < width; ++i) { 1.2151 + float b = (float)(src_argb[0]); 1.2152 + float g = (float)(src_argb[1]); 1.2153 + float r = (float)(src_argb[2]); 1.2154 + float a = (float)(src_argb[3]); 1.2155 + float b2 = b * b; 1.2156 + float g2 = g * g; 1.2157 + float r2 = r * r; 1.2158 + float a2 = a * a; 1.2159 + float db = poly[0] + poly[4] * b; 1.2160 + float dg = poly[1] + poly[5] * g; 1.2161 + float dr = poly[2] + poly[6] * r; 1.2162 + float da = poly[3] + poly[7] * a; 1.2163 + float b3 = b2 * b; 1.2164 + float g3 = g2 * g; 1.2165 + float r3 = r2 * r; 1.2166 + float a3 = a2 * a; 1.2167 + db += poly[8] * b2; 1.2168 + dg += poly[9] * g2; 1.2169 + dr += poly[10] * r2; 1.2170 + da += poly[11] * a2; 1.2171 + db += poly[12] * b3; 1.2172 + dg += poly[13] * g3; 1.2173 + dr += poly[14] * r3; 1.2174 + da += poly[15] * a3; 1.2175 + 1.2176 + dst_argb[0] = Clamp((int32)(db)); 1.2177 + dst_argb[1] = Clamp((int32)(dg)); 1.2178 + dst_argb[2] = Clamp((int32)(dr)); 1.2179 + dst_argb[3] = Clamp((int32)(da)); 1.2180 + src_argb += 4; 1.2181 + dst_argb += 4; 1.2182 + } 1.2183 +} 1.2184 + 1.2185 +void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, 1.2186 + const uint8* luma, uint32 lumacoeff) { 1.2187 + uint32 bc = lumacoeff & 0xff; 1.2188 + uint32 gc = (lumacoeff >> 8) & 0xff; 1.2189 + uint32 rc = (lumacoeff >> 16) & 0xff; 1.2190 + 1.2191 + int i; 1.2192 + for (i = 0; i < width - 1; i += 2) { 1.2193 + // Luminance in rows, color values in columns. 1.2194 + const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + 1.2195 + src_argb[2] * rc) & 0x7F00u) + luma; 1.2196 + const uint8* luma1; 1.2197 + dst_argb[0] = luma0[src_argb[0]]; 1.2198 + dst_argb[1] = luma0[src_argb[1]]; 1.2199 + dst_argb[2] = luma0[src_argb[2]]; 1.2200 + dst_argb[3] = src_argb[3]; 1.2201 + luma1 = ((src_argb[4] * bc + src_argb[5] * gc + 1.2202 + src_argb[6] * rc) & 0x7F00u) + luma; 1.2203 + dst_argb[4] = luma1[src_argb[4]]; 1.2204 + dst_argb[5] = luma1[src_argb[5]]; 1.2205 + dst_argb[6] = luma1[src_argb[6]]; 1.2206 + dst_argb[7] = src_argb[7]; 1.2207 + src_argb += 8; 1.2208 + dst_argb += 8; 1.2209 + } 1.2210 + if (width & 1) { 1.2211 + // Luminance in rows, color values in columns. 1.2212 + const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + 1.2213 + src_argb[2] * rc) & 0x7F00u) + luma; 1.2214 + dst_argb[0] = luma0[src_argb[0]]; 1.2215 + dst_argb[1] = luma0[src_argb[1]]; 1.2216 + dst_argb[2] = luma0[src_argb[2]]; 1.2217 + dst_argb[3] = src_argb[3]; 1.2218 + } 1.2219 +} 1.2220 + 1.2221 +void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { 1.2222 + int i; 1.2223 + for (i = 0; i < width - 1; i += 2) { 1.2224 + dst[3] = src[3]; 1.2225 + dst[7] = src[7]; 1.2226 + dst += 8; 1.2227 + src += 8; 1.2228 + } 1.2229 + if (width & 1) { 1.2230 + dst[3] = src[3]; 1.2231 + } 1.2232 +} 1.2233 + 1.2234 +void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { 1.2235 + int i; 1.2236 + for (i = 0; i < width - 1; i += 2) { 1.2237 + dst[3] = src[0]; 1.2238 + dst[7] = src[1]; 1.2239 + dst += 8; 1.2240 + src += 2; 1.2241 + } 1.2242 + if (width & 1) { 1.2243 + dst[3] = src[0]; 1.2244 + } 1.2245 +} 1.2246 + 1.2247 +#ifdef __cplusplus 1.2248 +} // extern "C" 1.2249 +} // namespace libyuv 1.2250 +#endif