1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/scale_argb.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,797 @@ 1.4 +/* 1.5 + * Copyright 2011 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/scale.h" 1.15 + 1.16 +#include <assert.h> 1.17 +#include <string.h> 1.18 + 1.19 +#include "libyuv/cpu_id.h" 1.20 +#include "libyuv/planar_functions.h" // For CopyARGB 1.21 +#include "libyuv/row.h" 1.22 +#include "libyuv/scale_row.h" 1.23 + 1.24 +#ifdef __cplusplus 1.25 +namespace libyuv { 1.26 +extern "C" { 1.27 +#endif 1.28 + 1.29 +static __inline int Abs(int v) { 1.30 + return v >= 0 ? v : -v; 1.31 +} 1.32 + 1.33 +// ScaleARGB ARGB, 1/2 1.34 +// This is an optimized version for scaling down a ARGB to 1/2 of 1.35 +// its original size. 1.36 +static void ScaleARGBDown2(int src_width, int src_height, 1.37 + int dst_width, int dst_height, 1.38 + int src_stride, int dst_stride, 1.39 + const uint8* src_argb, uint8* dst_argb, 1.40 + int x, int dx, int y, int dy, 1.41 + enum FilterMode filtering) { 1.42 + int j; 1.43 + int row_stride = src_stride * (dy >> 16); 1.44 + void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, 1.45 + uint8* dst_argb, int dst_width) = 1.46 + filtering == kFilterNone ? ScaleARGBRowDown2_C : 1.47 + (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : 1.48 + ScaleARGBRowDown2Box_C); 1.49 + assert(dx == 65536 * 2); // Test scale factor of 2. 1.50 + assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. 1.51 + // Advance to odd row, even column. 1.52 + if (filtering == kFilterBilinear) { 1.53 + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 1.54 + } else { 1.55 + src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; 1.56 + } 1.57 + 1.58 +#if defined(HAS_SCALEARGBROWDOWN2_SSE2) 1.59 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && 1.60 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && 1.61 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.62 + ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : 1.63 + (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : 1.64 + ScaleARGBRowDown2Box_SSE2); 1.65 + } 1.66 +#elif defined(HAS_SCALEARGBROWDOWN2_NEON) 1.67 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && 1.68 + IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { 1.69 + ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON : 1.70 + ScaleARGBRowDown2_NEON; 1.71 + } 1.72 +#endif 1.73 + 1.74 + if (filtering == kFilterLinear) { 1.75 + src_stride = 0; 1.76 + } 1.77 + for (j = 0; j < dst_height; ++j) { 1.78 + ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); 1.79 + src_argb += row_stride; 1.80 + dst_argb += dst_stride; 1.81 + } 1.82 +} 1.83 + 1.84 +// ScaleARGB ARGB, 1/4 1.85 +// This is an optimized version for scaling down a ARGB to 1/4 of 1.86 +// its original size. 1.87 +static void ScaleARGBDown4Box(int src_width, int src_height, 1.88 + int dst_width, int dst_height, 1.89 + int src_stride, int dst_stride, 1.90 + const uint8* src_argb, uint8* dst_argb, 1.91 + int x, int dx, int y, int dy) { 1.92 + int j; 1.93 + // Allocate 2 rows of ARGB. 1.94 + const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; 1.95 + align_buffer_64(row, kRowSize * 2); 1.96 + int row_stride = src_stride * (dy >> 16); 1.97 + void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, 1.98 + uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; 1.99 + // Advance to odd row, even column. 1.100 + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 1.101 + assert(dx == 65536 * 4); // Test scale factor of 4. 1.102 + assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. 1.103 +#if defined(HAS_SCALEARGBROWDOWN2_SSE2) 1.104 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && 1.105 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && 1.106 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.107 + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; 1.108 + } 1.109 +#elif defined(HAS_SCALEARGBROWDOWN2_NEON) 1.110 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && 1.111 + IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { 1.112 + ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; 1.113 + } 1.114 +#endif 1.115 + for (j = 0; j < dst_height; ++j) { 1.116 + ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); 1.117 + ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, 1.118 + row + kRowSize, dst_width * 2); 1.119 + ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); 1.120 + src_argb += row_stride; 1.121 + dst_argb += dst_stride; 1.122 + } 1.123 + free_aligned_buffer_64(row); 1.124 +} 1.125 + 1.126 +// ScaleARGB ARGB Even 1.127 +// This is an optimized version for scaling down a ARGB to even 1.128 +// multiple of its original size. 1.129 +static void ScaleARGBDownEven(int src_width, int src_height, 1.130 + int dst_width, int dst_height, 1.131 + int src_stride, int dst_stride, 1.132 + const uint8* src_argb, uint8* dst_argb, 1.133 + int x, int dx, int y, int dy, 1.134 + enum FilterMode filtering) { 1.135 + int j; 1.136 + int col_step = dx >> 16; 1.137 + int row_stride = (dy >> 16) * src_stride; 1.138 + void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, 1.139 + int src_step, uint8* dst_argb, int dst_width) = 1.140 + filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; 1.141 + assert(IS_ALIGNED(src_width, 2)); 1.142 + assert(IS_ALIGNED(src_height, 2)); 1.143 + src_argb += (y >> 16) * src_stride + (x >> 16) * 4; 1.144 +#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) 1.145 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && 1.146 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.147 + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : 1.148 + ScaleARGBRowDownEven_SSE2; 1.149 + } 1.150 +#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) 1.151 + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) && 1.152 + IS_ALIGNED(src_argb, 4)) { 1.153 + ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : 1.154 + ScaleARGBRowDownEven_NEON; 1.155 + } 1.156 +#endif 1.157 + 1.158 + if (filtering == kFilterLinear) { 1.159 + src_stride = 0; 1.160 + } 1.161 + for (j = 0; j < dst_height; ++j) { 1.162 + ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); 1.163 + src_argb += row_stride; 1.164 + dst_argb += dst_stride; 1.165 + } 1.166 +} 1.167 + 1.168 +// Scale ARGB down with bilinear interpolation. 1.169 +static void ScaleARGBBilinearDown(int src_width, int src_height, 1.170 + int dst_width, int dst_height, 1.171 + int src_stride, int dst_stride, 1.172 + const uint8* src_argb, uint8* dst_argb, 1.173 + int x, int dx, int y, int dy, 1.174 + enum FilterMode filtering) { 1.175 + int j; 1.176 + int64 xlast = x + (int64)(dst_width - 1) * dx; 1.177 + int64 xl = (dx >= 0) ? x : xlast; 1.178 + int64 xr = (dx >= 0) ? xlast : x; 1.179 + int clip_src_width; 1.180 + xl = (xl >> 16) & ~3; // Left edge aligned. 1.181 + xr = (xr >> 16) + 1; // Right most pixel used. 1.182 + clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4. 1.183 + src_argb += xl * 4; 1.184 + x -= (int)(xl << 16); 1.185 + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 1.186 + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1.187 + InterpolateRow_C; 1.188 +#if defined(HAS_INTERPOLATEROW_SSE2) 1.189 + if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) { 1.190 + InterpolateRow = InterpolateRow_Any_SSE2; 1.191 + if (IS_ALIGNED(clip_src_width, 16)) { 1.192 + InterpolateRow = InterpolateRow_Unaligned_SSE2; 1.193 + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { 1.194 + InterpolateRow = InterpolateRow_SSE2; 1.195 + } 1.196 + } 1.197 + } 1.198 +#endif 1.199 +#if defined(HAS_INTERPOLATEROW_SSSE3) 1.200 + if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) { 1.201 + InterpolateRow = InterpolateRow_Any_SSSE3; 1.202 + if (IS_ALIGNED(clip_src_width, 16)) { 1.203 + InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1.204 + if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { 1.205 + InterpolateRow = InterpolateRow_SSSE3; 1.206 + } 1.207 + } 1.208 + } 1.209 +#endif 1.210 +#if defined(HAS_INTERPOLATEROW_AVX2) 1.211 + if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) { 1.212 + InterpolateRow = InterpolateRow_Any_AVX2; 1.213 + if (IS_ALIGNED(clip_src_width, 32)) { 1.214 + InterpolateRow = InterpolateRow_AVX2; 1.215 + } 1.216 + } 1.217 +#endif 1.218 +#if defined(HAS_INTERPOLATEROW_NEON) 1.219 + if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) { 1.220 + InterpolateRow = InterpolateRow_Any_NEON; 1.221 + if (IS_ALIGNED(clip_src_width, 16)) { 1.222 + InterpolateRow = InterpolateRow_NEON; 1.223 + } 1.224 + } 1.225 +#endif 1.226 +#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1.227 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 && 1.228 + IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { 1.229 + InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; 1.230 + if (IS_ALIGNED(clip_src_width, 4)) { 1.231 + InterpolateRow = InterpolateRow_MIPS_DSPR2; 1.232 + } 1.233 + } 1.234 +#endif 1.235 + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, 1.236 + int dst_width, int x, int dx) = 1.237 + (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; 1.238 +#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 1.239 + if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1.240 + ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 1.241 + } 1.242 +#endif 1.243 + // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. 1.244 + // Allocate a row of ARGB. 1.245 + align_buffer_64(row, clip_src_width * 4); 1.246 + 1.247 + const int max_y = (src_height - 1) << 16; 1.248 + for (j = 0; j < dst_height; ++j) { 1.249 + if (y > max_y) { 1.250 + y = max_y; 1.251 + } 1.252 + int yi = y >> 16; 1.253 + const uint8* src = src_argb + yi * src_stride; 1.254 + if (filtering == kFilterLinear) { 1.255 + ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); 1.256 + } else { 1.257 + int yf = (y >> 8) & 255; 1.258 + InterpolateRow(row, src, src_stride, clip_src_width, yf); 1.259 + ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); 1.260 + } 1.261 + dst_argb += dst_stride; 1.262 + y += dy; 1.263 + } 1.264 + free_aligned_buffer_64(row); 1.265 +} 1.266 + 1.267 +// Scale ARGB up with bilinear interpolation. 1.268 +static void ScaleARGBBilinearUp(int src_width, int src_height, 1.269 + int dst_width, int dst_height, 1.270 + int src_stride, int dst_stride, 1.271 + const uint8* src_argb, uint8* dst_argb, 1.272 + int x, int dx, int y, int dy, 1.273 + enum FilterMode filtering) { 1.274 + int j; 1.275 + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 1.276 + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1.277 + InterpolateRow_C; 1.278 + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, 1.279 + int dst_width, int x, int dx) = 1.280 + filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; 1.281 +#if defined(HAS_INTERPOLATEROW_SSE2) 1.282 + if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { 1.283 + InterpolateRow = InterpolateRow_Any_SSE2; 1.284 + if (IS_ALIGNED(dst_width, 4)) { 1.285 + InterpolateRow = InterpolateRow_Unaligned_SSE2; 1.286 + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.287 + InterpolateRow = InterpolateRow_SSE2; 1.288 + } 1.289 + } 1.290 + } 1.291 +#endif 1.292 +#if defined(HAS_INTERPOLATEROW_SSSE3) 1.293 + if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { 1.294 + InterpolateRow = InterpolateRow_Any_SSSE3; 1.295 + if (IS_ALIGNED(dst_width, 4)) { 1.296 + InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1.297 + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.298 + InterpolateRow = InterpolateRow_SSSE3; 1.299 + } 1.300 + } 1.301 + } 1.302 +#endif 1.303 +#if defined(HAS_INTERPOLATEROW_AVX2) 1.304 + if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { 1.305 + InterpolateRow = InterpolateRow_Any_AVX2; 1.306 + if (IS_ALIGNED(dst_width, 8)) { 1.307 + InterpolateRow = InterpolateRow_AVX2; 1.308 + } 1.309 + } 1.310 +#endif 1.311 +#if defined(HAS_INTERPOLATEROW_NEON) 1.312 + if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { 1.313 + InterpolateRow = InterpolateRow_Any_NEON; 1.314 + if (IS_ALIGNED(dst_width, 4)) { 1.315 + InterpolateRow = InterpolateRow_NEON; 1.316 + } 1.317 + } 1.318 +#endif 1.319 +#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1.320 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && 1.321 + IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { 1.322 + InterpolateRow = InterpolateRow_MIPS_DSPR2; 1.323 + } 1.324 +#endif 1.325 + if (src_width >= 32768) { 1.326 + ScaleARGBFilterCols = filtering ? 1.327 + ScaleARGBFilterCols64_C : ScaleARGBCols64_C; 1.328 + } 1.329 +#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 1.330 + if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1.331 + ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 1.332 + } 1.333 +#endif 1.334 +#if defined(HAS_SCALEARGBCOLS_SSE2) 1.335 + if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 1.336 + ScaleARGBFilterCols = ScaleARGBCols_SSE2; 1.337 + } 1.338 +#endif 1.339 + if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1.340 + ScaleARGBFilterCols = ScaleARGBColsUp2_C; 1.341 +#if defined(HAS_SCALEARGBCOLSUP2_SSE2) 1.342 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1.343 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 1.344 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.345 + ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; 1.346 + } 1.347 +#endif 1.348 + } 1.349 + 1.350 + const int max_y = (src_height - 1) << 16; 1.351 + if (y > max_y) { 1.352 + y = max_y; 1.353 + } 1.354 + int yi = y >> 16; 1.355 + const uint8* src = src_argb + yi * src_stride; 1.356 + 1.357 + // Allocate 2 rows of ARGB. 1.358 + const int kRowSize = (dst_width * 4 + 15) & ~15; 1.359 + align_buffer_64(row, kRowSize * 2); 1.360 + 1.361 + uint8* rowptr = row; 1.362 + int rowstride = kRowSize; 1.363 + int lasty = yi; 1.364 + 1.365 + ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); 1.366 + if (src_height > 1) { 1.367 + src += src_stride; 1.368 + } 1.369 + ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); 1.370 + src += src_stride; 1.371 + 1.372 + for (j = 0; j < dst_height; ++j) { 1.373 + yi = y >> 16; 1.374 + if (yi != lasty) { 1.375 + if (y > max_y) { 1.376 + y = max_y; 1.377 + yi = y >> 16; 1.378 + src = src_argb + yi * src_stride; 1.379 + } 1.380 + if (yi != lasty) { 1.381 + ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); 1.382 + rowptr += rowstride; 1.383 + rowstride = -rowstride; 1.384 + lasty = yi; 1.385 + src += src_stride; 1.386 + } 1.387 + } 1.388 + if (filtering == kFilterLinear) { 1.389 + InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); 1.390 + } else { 1.391 + int yf = (y >> 8) & 255; 1.392 + InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); 1.393 + } 1.394 + dst_argb += dst_stride; 1.395 + y += dy; 1.396 + } 1.397 + free_aligned_buffer_64(row); 1.398 +} 1.399 + 1.400 +#ifdef YUVSCALEUP 1.401 +// Scale YUV to ARGB up with bilinear interpolation. 1.402 +static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, 1.403 + int dst_width, int dst_height, 1.404 + int src_stride_y, 1.405 + int src_stride_u, 1.406 + int src_stride_v, 1.407 + int dst_stride_argb, 1.408 + const uint8* src_y, 1.409 + const uint8* src_u, 1.410 + const uint8* src_v, 1.411 + uint8* dst_argb, 1.412 + int x, int dx, int y, int dy, 1.413 + enum FilterMode filtering) { 1.414 + int j; 1.415 + void (*I422ToARGBRow)(const uint8* y_buf, 1.416 + const uint8* u_buf, 1.417 + const uint8* v_buf, 1.418 + uint8* rgb_buf, 1.419 + int width) = I422ToARGBRow_C; 1.420 +#if defined(HAS_I422TOARGBROW_SSSE3) 1.421 + if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) { 1.422 + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; 1.423 + if (IS_ALIGNED(src_width, 8)) { 1.424 + I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; 1.425 + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.426 + I422ToARGBRow = I422ToARGBRow_SSSE3; 1.427 + } 1.428 + } 1.429 + } 1.430 +#endif 1.431 +#if defined(HAS_I422TOARGBROW_AVX2) 1.432 + if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) { 1.433 + I422ToARGBRow = I422ToARGBRow_Any_AVX2; 1.434 + if (IS_ALIGNED(src_width, 16)) { 1.435 + I422ToARGBRow = I422ToARGBRow_AVX2; 1.436 + } 1.437 + } 1.438 +#endif 1.439 +#if defined(HAS_I422TOARGBROW_NEON) 1.440 + if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) { 1.441 + I422ToARGBRow = I422ToARGBRow_Any_NEON; 1.442 + if (IS_ALIGNED(src_width, 8)) { 1.443 + I422ToARGBRow = I422ToARGBRow_NEON; 1.444 + } 1.445 + } 1.446 +#endif 1.447 +#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) 1.448 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) && 1.449 + IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && 1.450 + IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && 1.451 + IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && 1.452 + IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 1.453 + I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; 1.454 + } 1.455 +#endif 1.456 + 1.457 + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, 1.458 + ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1.459 + InterpolateRow_C; 1.460 +#if defined(HAS_INTERPOLATEROW_SSE2) 1.461 + if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { 1.462 + InterpolateRow = InterpolateRow_Any_SSE2; 1.463 + if (IS_ALIGNED(dst_width, 4)) { 1.464 + InterpolateRow = InterpolateRow_Unaligned_SSE2; 1.465 + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.466 + InterpolateRow = InterpolateRow_SSE2; 1.467 + } 1.468 + } 1.469 + } 1.470 +#endif 1.471 +#if defined(HAS_INTERPOLATEROW_SSSE3) 1.472 + if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { 1.473 + InterpolateRow = InterpolateRow_Any_SSSE3; 1.474 + if (IS_ALIGNED(dst_width, 4)) { 1.475 + InterpolateRow = InterpolateRow_Unaligned_SSSE3; 1.476 + if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { 1.477 + InterpolateRow = InterpolateRow_SSSE3; 1.478 + } 1.479 + } 1.480 + } 1.481 +#endif 1.482 +#if defined(HAS_INTERPOLATEROW_AVX2) 1.483 + if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { 1.484 + InterpolateRow = InterpolateRow_Any_AVX2; 1.485 + if (IS_ALIGNED(dst_width, 8)) { 1.486 + InterpolateRow = InterpolateRow_AVX2; 1.487 + } 1.488 + } 1.489 +#endif 1.490 +#if defined(HAS_INTERPOLATEROW_NEON) 1.491 + if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { 1.492 + InterpolateRow = InterpolateRow_Any_NEON; 1.493 + if (IS_ALIGNED(dst_width, 4)) { 1.494 + InterpolateRow = InterpolateRow_NEON; 1.495 + } 1.496 + } 1.497 +#endif 1.498 +#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) 1.499 + if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && 1.500 + IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { 1.501 + InterpolateRow = InterpolateRow_MIPS_DSPR2; 1.502 + } 1.503 +#endif 1.504 + 1.505 + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, 1.506 + int dst_width, int x, int dx) = 1.507 + filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; 1.508 + if (src_width >= 32768) { 1.509 + ScaleARGBFilterCols = filtering ? 1.510 + ScaleARGBFilterCols64_C : ScaleARGBCols64_C; 1.511 + } 1.512 +#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) 1.513 + if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1.514 + ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; 1.515 + } 1.516 +#endif 1.517 +#if defined(HAS_SCALEARGBCOLS_SSE2) 1.518 + if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 1.519 + ScaleARGBFilterCols = ScaleARGBCols_SSE2; 1.520 + } 1.521 +#endif 1.522 + if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1.523 + ScaleARGBFilterCols = ScaleARGBColsUp2_C; 1.524 +#if defined(HAS_SCALEARGBCOLSUP2_SSE2) 1.525 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1.526 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 1.527 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.528 + ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; 1.529 + } 1.530 +#endif 1.531 + } 1.532 + 1.533 + const int max_y = (src_height - 1) << 16; 1.534 + if (y > max_y) { 1.535 + y = max_y; 1.536 + } 1.537 + const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. 1.538 + int yi = y >> 16; 1.539 + int uv_yi = yi >> kYShift; 1.540 + const uint8* src_row_y = src_y + yi * src_stride_y; 1.541 + const uint8* src_row_u = src_u + uv_yi * src_stride_u; 1.542 + const uint8* src_row_v = src_v + uv_yi * src_stride_v; 1.543 + 1.544 + // Allocate 2 rows of ARGB. 1.545 + const int kRowSize = (dst_width * 4 + 15) & ~15; 1.546 + align_buffer_64(row, kRowSize * 2); 1.547 + 1.548 + // Allocate 1 row of ARGB for source conversion. 1.549 + align_buffer_64(argb_row, src_width * 4); 1.550 + 1.551 + uint8* rowptr = row; 1.552 + int rowstride = kRowSize; 1.553 + int lasty = yi; 1.554 + 1.555 + // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. 1.556 + ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); 1.557 + if (src_height > 1) { 1.558 + src_row_y += src_stride_y; 1.559 + if (yi & 1) { 1.560 + src_row_u += src_stride_u; 1.561 + src_row_v += src_stride_v; 1.562 + } 1.563 + } 1.564 + ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); 1.565 + if (src_height > 2) { 1.566 + src_row_y += src_stride_y; 1.567 + if (!(yi & 1)) { 1.568 + src_row_u += src_stride_u; 1.569 + src_row_v += src_stride_v; 1.570 + } 1.571 + } 1.572 + 1.573 + for (j = 0; j < dst_height; ++j) { 1.574 + yi = y >> 16; 1.575 + if (yi != lasty) { 1.576 + if (y > max_y) { 1.577 + y = max_y; 1.578 + yi = y >> 16; 1.579 + uv_yi = yi >> kYShift; 1.580 + src_row_y = src_y + yi * src_stride_y; 1.581 + src_row_u = src_u + uv_yi * src_stride_u; 1.582 + src_row_v = src_v + uv_yi * src_stride_v; 1.583 + } 1.584 + if (yi != lasty) { 1.585 + // TODO(fbarchard): Convert the clipped region of row. 1.586 + I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); 1.587 + ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); 1.588 + rowptr += rowstride; 1.589 + rowstride = -rowstride; 1.590 + lasty = yi; 1.591 + src_row_y += src_stride_y; 1.592 + if (yi & 1) { 1.593 + src_row_u += src_stride_u; 1.594 + src_row_v += src_stride_v; 1.595 + } 1.596 + } 1.597 + } 1.598 + if (filtering == kFilterLinear) { 1.599 + InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); 1.600 + } else { 1.601 + int yf = (y >> 8) & 255; 1.602 + InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); 1.603 + } 1.604 + dst_argb += dst_stride_argb; 1.605 + y += dy; 1.606 + } 1.607 + free_aligned_buffer_64(row); 1.608 + free_aligned_buffer_64(row_argb); 1.609 +} 1.610 +#endif 1.611 + 1.612 +// Scale ARGB to/from any dimensions, without interpolation. 1.613 +// Fixed point math is used for performance: The upper 16 bits 1.614 +// of x and dx is the integer part of the source position and 1.615 +// the lower 16 bits are the fixed decimal part. 1.616 + 1.617 +static void ScaleARGBSimple(int src_width, int src_height, 1.618 + int dst_width, int dst_height, 1.619 + int src_stride, int dst_stride, 1.620 + const uint8* src_argb, uint8* dst_argb, 1.621 + int x, int dx, int y, int dy) { 1.622 + int j; 1.623 + void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, 1.624 + int dst_width, int x, int dx) = 1.625 + (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; 1.626 +#if defined(HAS_SCALEARGBCOLS_SSE2) 1.627 + if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { 1.628 + ScaleARGBCols = ScaleARGBCols_SSE2; 1.629 + } 1.630 +#endif 1.631 + if (src_width * 2 == dst_width && x < 0x8000) { 1.632 + ScaleARGBCols = ScaleARGBColsUp2_C; 1.633 +#if defined(HAS_SCALEARGBCOLSUP2_SSE2) 1.634 + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && 1.635 + IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && 1.636 + IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { 1.637 + ScaleARGBCols = ScaleARGBColsUp2_SSE2; 1.638 + } 1.639 +#endif 1.640 + } 1.641 + 1.642 + for (j = 0; j < dst_height; ++j) { 1.643 + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, 1.644 + dst_width, x, dx); 1.645 + dst_argb += dst_stride; 1.646 + y += dy; 1.647 + } 1.648 +} 1.649 + 1.650 +// ScaleARGB a ARGB. 1.651 +// This function in turn calls a scaling function 1.652 +// suitable for handling the desired resolutions. 1.653 +static void ScaleARGB(const uint8* src, int src_stride, 1.654 + int src_width, int src_height, 1.655 + uint8* dst, int dst_stride, 1.656 + int dst_width, int dst_height, 1.657 + int clip_x, int clip_y, int clip_width, int clip_height, 1.658 + enum FilterMode filtering) { 1.659 + // Initial source x/y coordinate and step values as 16.16 fixed point. 1.660 + int x = 0; 1.661 + int y = 0; 1.662 + int dx = 0; 1.663 + int dy = 0; 1.664 + // ARGB does not support box filter yet, but allow the user to pass it. 1.665 + // Simplify filtering when possible. 1.666 + filtering = ScaleFilterReduce(src_width, src_height, 1.667 + dst_width, dst_height, 1.668 + filtering); 1.669 + 1.670 + // Negative src_height means invert the image. 1.671 + if (src_height < 0) { 1.672 + src_height = -src_height; 1.673 + src = src + (src_height - 1) * src_stride; 1.674 + src_stride = -src_stride; 1.675 + } 1.676 + ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 1.677 + &x, &y, &dx, &dy); 1.678 + src_width = Abs(src_width); 1.679 + if (clip_x) { 1.680 + int64 clipf = (int64)(clip_x) * dx; 1.681 + x += (clipf & 0xffff); 1.682 + src += (clipf >> 16) * 4; 1.683 + dst += clip_x * 4; 1.684 + } 1.685 + if (clip_y) { 1.686 + int64 clipf = (int64)(clip_y) * dy; 1.687 + y += (clipf & 0xffff); 1.688 + src += (clipf >> 16) * src_stride; 1.689 + dst += clip_y * dst_stride; 1.690 + } 1.691 + 1.692 + // Special case for integer step values. 1.693 + if (((dx | dy) & 0xffff) == 0) { 1.694 + if (!dx || !dy) { // 1 pixel wide and/or tall. 1.695 + filtering = kFilterNone; 1.696 + } else { 1.697 + // Optimized even scale down. ie 2, 4, 6, 8, 10x. 1.698 + if (!(dx & 0x10000) && !(dy & 0x10000)) { 1.699 + if (dx == 0x20000) { 1.700 + // Optimized 1/2 downsample. 1.701 + ScaleARGBDown2(src_width, src_height, 1.702 + clip_width, clip_height, 1.703 + src_stride, dst_stride, src, dst, 1.704 + x, dx, y, dy, filtering); 1.705 + return; 1.706 + } 1.707 + if (dx == 0x40000 && filtering == kFilterBox) { 1.708 + // Optimized 1/4 box downsample. 1.709 + ScaleARGBDown4Box(src_width, src_height, 1.710 + clip_width, clip_height, 1.711 + src_stride, dst_stride, src, dst, 1.712 + x, dx, y, dy); 1.713 + return; 1.714 + } 1.715 + ScaleARGBDownEven(src_width, src_height, 1.716 + clip_width, clip_height, 1.717 + src_stride, dst_stride, src, dst, 1.718 + x, dx, y, dy, filtering); 1.719 + return; 1.720 + } 1.721 + // Optimized odd scale down. ie 3, 5, 7, 9x. 1.722 + if ((dx & 0x10000) && (dy & 0x10000)) { 1.723 + filtering = kFilterNone; 1.724 + if (dx == 0x10000 && dy == 0x10000) { 1.725 + // Straight copy. 1.726 + ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, 1.727 + dst, dst_stride, clip_width, clip_height); 1.728 + return; 1.729 + } 1.730 + } 1.731 + } 1.732 + } 1.733 + if (dx == 0x10000 && (x & 0xffff) == 0) { 1.734 + // Arbitrary scale vertically, but unscaled vertically. 1.735 + ScalePlaneVertical(src_height, 1.736 + clip_width, clip_height, 1.737 + src_stride, dst_stride, src, dst, 1.738 + x, y, dy, 4, filtering); 1.739 + return; 1.740 + } 1.741 + if (filtering && dy < 65536) { 1.742 + ScaleARGBBilinearUp(src_width, src_height, 1.743 + clip_width, clip_height, 1.744 + src_stride, dst_stride, src, dst, 1.745 + x, dx, y, dy, filtering); 1.746 + return; 1.747 + } 1.748 + if (filtering) { 1.749 + ScaleARGBBilinearDown(src_width, src_height, 1.750 + clip_width, clip_height, 1.751 + src_stride, dst_stride, src, dst, 1.752 + x, dx, y, dy, filtering); 1.753 + return; 1.754 + } 1.755 + ScaleARGBSimple(src_width, src_height, clip_width, clip_height, 1.756 + src_stride, dst_stride, src, dst, 1.757 + x, dx, y, dy); 1.758 +} 1.759 + 1.760 +LIBYUV_API 1.761 +int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, 1.762 + int src_width, int src_height, 1.763 + uint8* dst_argb, int dst_stride_argb, 1.764 + int dst_width, int dst_height, 1.765 + int clip_x, int clip_y, int clip_width, int clip_height, 1.766 + enum FilterMode filtering) { 1.767 + if (!src_argb || src_width == 0 || src_height == 0 || 1.768 + !dst_argb || dst_width <= 0 || dst_height <= 0 || 1.769 + clip_x < 0 || clip_y < 0 || 1.770 + (clip_x + clip_width) > dst_width || 1.771 + (clip_y + clip_height) > dst_height) { 1.772 + return -1; 1.773 + } 1.774 + ScaleARGB(src_argb, src_stride_argb, src_width, src_height, 1.775 + dst_argb, dst_stride_argb, dst_width, dst_height, 1.776 + clip_x, clip_y, clip_width, clip_height, filtering); 1.777 + return 0; 1.778 +} 1.779 + 1.780 +// Scale an ARGB image. 1.781 +LIBYUV_API 1.782 +int ARGBScale(const uint8* src_argb, int src_stride_argb, 1.783 + int src_width, int src_height, 1.784 + uint8* dst_argb, int dst_stride_argb, 1.785 + int dst_width, int dst_height, 1.786 + enum FilterMode filtering) { 1.787 + if (!src_argb || src_width == 0 || src_height == 0 || 1.788 + !dst_argb || dst_width <= 0 || dst_height <= 0) { 1.789 + return -1; 1.790 + } 1.791 + ScaleARGB(src_argb, src_stride_argb, src_width, src_height, 1.792 + dst_argb, dst_stride_argb, dst_width, dst_height, 1.793 + 0, 0, dst_width, dst_height, filtering); 1.794 + return 0; 1.795 +} 1.796 + 1.797 +#ifdef __cplusplus 1.798 +} // extern "C" 1.799 +} // namespace libyuv 1.800 +#endif