media/libyuv/source/scale_argb.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/scale.h"
michael@0 12
michael@0 13 #include <assert.h>
michael@0 14 #include <string.h>
michael@0 15
michael@0 16 #include "libyuv/cpu_id.h"
michael@0 17 #include "libyuv/planar_functions.h" // For CopyARGB
michael@0 18 #include "libyuv/row.h"
michael@0 19 #include "libyuv/scale_row.h"
michael@0 20
michael@0 21 #ifdef __cplusplus
michael@0 22 namespace libyuv {
michael@0 23 extern "C" {
michael@0 24 #endif
michael@0 25
michael@0 26 static __inline int Abs(int v) {
michael@0 27 return v >= 0 ? v : -v;
michael@0 28 }
michael@0 29
michael@0 30 // ScaleARGB ARGB, 1/2
michael@0 31 // This is an optimized version for scaling down a ARGB to 1/2 of
michael@0 32 // its original size.
michael@0 33 static void ScaleARGBDown2(int src_width, int src_height,
michael@0 34 int dst_width, int dst_height,
michael@0 35 int src_stride, int dst_stride,
michael@0 36 const uint8* src_argb, uint8* dst_argb,
michael@0 37 int x, int dx, int y, int dy,
michael@0 38 enum FilterMode filtering) {
michael@0 39 int j;
michael@0 40 int row_stride = src_stride * (dy >> 16);
michael@0 41 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
michael@0 42 uint8* dst_argb, int dst_width) =
michael@0 43 filtering == kFilterNone ? ScaleARGBRowDown2_C :
michael@0 44 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
michael@0 45 ScaleARGBRowDown2Box_C);
michael@0 46 assert(dx == 65536 * 2); // Test scale factor of 2.
michael@0 47 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
michael@0 48 // Advance to odd row, even column.
michael@0 49 if (filtering == kFilterBilinear) {
michael@0 50 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
michael@0 51 } else {
michael@0 52 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
michael@0 53 }
michael@0 54
michael@0 55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
michael@0 56 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
michael@0 57 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
michael@0 58 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 59 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
michael@0 60 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
michael@0 61 ScaleARGBRowDown2Box_SSE2);
michael@0 62 }
michael@0 63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
michael@0 64 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
michael@0 65 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
michael@0 66 ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
michael@0 67 ScaleARGBRowDown2_NEON;
michael@0 68 }
michael@0 69 #endif
michael@0 70
michael@0 71 if (filtering == kFilterLinear) {
michael@0 72 src_stride = 0;
michael@0 73 }
michael@0 74 for (j = 0; j < dst_height; ++j) {
michael@0 75 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
michael@0 76 src_argb += row_stride;
michael@0 77 dst_argb += dst_stride;
michael@0 78 }
michael@0 79 }
michael@0 80
michael@0 81 // ScaleARGB ARGB, 1/4
michael@0 82 // This is an optimized version for scaling down a ARGB to 1/4 of
michael@0 83 // its original size.
michael@0 84 static void ScaleARGBDown4Box(int src_width, int src_height,
michael@0 85 int dst_width, int dst_height,
michael@0 86 int src_stride, int dst_stride,
michael@0 87 const uint8* src_argb, uint8* dst_argb,
michael@0 88 int x, int dx, int y, int dy) {
michael@0 89 int j;
michael@0 90 // Allocate 2 rows of ARGB.
michael@0 91 const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
michael@0 92 align_buffer_64(row, kRowSize * 2);
michael@0 93 int row_stride = src_stride * (dy >> 16);
michael@0 94 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
michael@0 95 uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
michael@0 96 // Advance to odd row, even column.
michael@0 97 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
michael@0 98 assert(dx == 65536 * 4); // Test scale factor of 4.
michael@0 99 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
michael@0 100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
michael@0 101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
michael@0 102 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
michael@0 103 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 104 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
michael@0 105 }
michael@0 106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
michael@0 107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
michael@0 108 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
michael@0 109 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
michael@0 110 }
michael@0 111 #endif
michael@0 112 for (j = 0; j < dst_height; ++j) {
michael@0 113 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
michael@0 114 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
michael@0 115 row + kRowSize, dst_width * 2);
michael@0 116 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
michael@0 117 src_argb += row_stride;
michael@0 118 dst_argb += dst_stride;
michael@0 119 }
michael@0 120 free_aligned_buffer_64(row);
michael@0 121 }
michael@0 122
michael@0 123 // ScaleARGB ARGB Even
michael@0 124 // This is an optimized version for scaling down a ARGB to even
michael@0 125 // multiple of its original size.
michael@0 126 static void ScaleARGBDownEven(int src_width, int src_height,
michael@0 127 int dst_width, int dst_height,
michael@0 128 int src_stride, int dst_stride,
michael@0 129 const uint8* src_argb, uint8* dst_argb,
michael@0 130 int x, int dx, int y, int dy,
michael@0 131 enum FilterMode filtering) {
michael@0 132 int j;
michael@0 133 int col_step = dx >> 16;
michael@0 134 int row_stride = (dy >> 16) * src_stride;
michael@0 135 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
michael@0 136 int src_step, uint8* dst_argb, int dst_width) =
michael@0 137 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
michael@0 138 assert(IS_ALIGNED(src_width, 2));
michael@0 139 assert(IS_ALIGNED(src_height, 2));
michael@0 140 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
michael@0 141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
michael@0 142 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
michael@0 143 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 144 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
michael@0 145 ScaleARGBRowDownEven_SSE2;
michael@0 146 }
michael@0 147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
michael@0 148 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
michael@0 149 IS_ALIGNED(src_argb, 4)) {
michael@0 150 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
michael@0 151 ScaleARGBRowDownEven_NEON;
michael@0 152 }
michael@0 153 #endif
michael@0 154
michael@0 155 if (filtering == kFilterLinear) {
michael@0 156 src_stride = 0;
michael@0 157 }
michael@0 158 for (j = 0; j < dst_height; ++j) {
michael@0 159 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
michael@0 160 src_argb += row_stride;
michael@0 161 dst_argb += dst_stride;
michael@0 162 }
michael@0 163 }
michael@0 164
michael@0 165 // Scale ARGB down with bilinear interpolation.
michael@0 166 static void ScaleARGBBilinearDown(int src_width, int src_height,
michael@0 167 int dst_width, int dst_height,
michael@0 168 int src_stride, int dst_stride,
michael@0 169 const uint8* src_argb, uint8* dst_argb,
michael@0 170 int x, int dx, int y, int dy,
michael@0 171 enum FilterMode filtering) {
michael@0 172 int j;
michael@0 173 int64 xlast = x + (int64)(dst_width - 1) * dx;
michael@0 174 int64 xl = (dx >= 0) ? x : xlast;
michael@0 175 int64 xr = (dx >= 0) ? xlast : x;
michael@0 176 int clip_src_width;
michael@0 177 xl = (xl >> 16) & ~3; // Left edge aligned.
michael@0 178 xr = (xr >> 16) + 1; // Right most pixel used.
michael@0 179 clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4; // Width aligned to 4.
michael@0 180 src_argb += xl * 4;
michael@0 181 x -= (int)(xl << 16);
michael@0 182 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
michael@0 183 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
michael@0 184 InterpolateRow_C;
michael@0 185 #if defined(HAS_INTERPOLATEROW_SSE2)
michael@0 186 if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
michael@0 187 InterpolateRow = InterpolateRow_Any_SSE2;
michael@0 188 if (IS_ALIGNED(clip_src_width, 16)) {
michael@0 189 InterpolateRow = InterpolateRow_Unaligned_SSE2;
michael@0 190 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
michael@0 191 InterpolateRow = InterpolateRow_SSE2;
michael@0 192 }
michael@0 193 }
michael@0 194 }
michael@0 195 #endif
michael@0 196 #if defined(HAS_INTERPOLATEROW_SSSE3)
michael@0 197 if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
michael@0 198 InterpolateRow = InterpolateRow_Any_SSSE3;
michael@0 199 if (IS_ALIGNED(clip_src_width, 16)) {
michael@0 200 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
michael@0 201 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
michael@0 202 InterpolateRow = InterpolateRow_SSSE3;
michael@0 203 }
michael@0 204 }
michael@0 205 }
michael@0 206 #endif
michael@0 207 #if defined(HAS_INTERPOLATEROW_AVX2)
michael@0 208 if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
michael@0 209 InterpolateRow = InterpolateRow_Any_AVX2;
michael@0 210 if (IS_ALIGNED(clip_src_width, 32)) {
michael@0 211 InterpolateRow = InterpolateRow_AVX2;
michael@0 212 }
michael@0 213 }
michael@0 214 #endif
michael@0 215 #if defined(HAS_INTERPOLATEROW_NEON)
michael@0 216 if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
michael@0 217 InterpolateRow = InterpolateRow_Any_NEON;
michael@0 218 if (IS_ALIGNED(clip_src_width, 16)) {
michael@0 219 InterpolateRow = InterpolateRow_NEON;
michael@0 220 }
michael@0 221 }
michael@0 222 #endif
michael@0 223 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
michael@0 224 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
michael@0 225 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
michael@0 226 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
michael@0 227 if (IS_ALIGNED(clip_src_width, 4)) {
michael@0 228 InterpolateRow = InterpolateRow_MIPS_DSPR2;
michael@0 229 }
michael@0 230 }
michael@0 231 #endif
michael@0 232 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
michael@0 233 int dst_width, int x, int dx) =
michael@0 234 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
michael@0 235 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
michael@0 236 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
michael@0 237 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
michael@0 238 }
michael@0 239 #endif
michael@0 240 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
michael@0 241 // Allocate a row of ARGB.
michael@0 242 align_buffer_64(row, clip_src_width * 4);
michael@0 243
michael@0 244 const int max_y = (src_height - 1) << 16;
michael@0 245 for (j = 0; j < dst_height; ++j) {
michael@0 246 if (y > max_y) {
michael@0 247 y = max_y;
michael@0 248 }
michael@0 249 int yi = y >> 16;
michael@0 250 const uint8* src = src_argb + yi * src_stride;
michael@0 251 if (filtering == kFilterLinear) {
michael@0 252 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
michael@0 253 } else {
michael@0 254 int yf = (y >> 8) & 255;
michael@0 255 InterpolateRow(row, src, src_stride, clip_src_width, yf);
michael@0 256 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
michael@0 257 }
michael@0 258 dst_argb += dst_stride;
michael@0 259 y += dy;
michael@0 260 }
michael@0 261 free_aligned_buffer_64(row);
michael@0 262 }
michael@0 263
michael@0 264 // Scale ARGB up with bilinear interpolation.
michael@0 265 static void ScaleARGBBilinearUp(int src_width, int src_height,
michael@0 266 int dst_width, int dst_height,
michael@0 267 int src_stride, int dst_stride,
michael@0 268 const uint8* src_argb, uint8* dst_argb,
michael@0 269 int x, int dx, int y, int dy,
michael@0 270 enum FilterMode filtering) {
michael@0 271 int j;
michael@0 272 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
michael@0 273 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
michael@0 274 InterpolateRow_C;
michael@0 275 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
michael@0 276 int dst_width, int x, int dx) =
michael@0 277 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
michael@0 278 #if defined(HAS_INTERPOLATEROW_SSE2)
michael@0 279 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
michael@0 280 InterpolateRow = InterpolateRow_Any_SSE2;
michael@0 281 if (IS_ALIGNED(dst_width, 4)) {
michael@0 282 InterpolateRow = InterpolateRow_Unaligned_SSE2;
michael@0 283 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 284 InterpolateRow = InterpolateRow_SSE2;
michael@0 285 }
michael@0 286 }
michael@0 287 }
michael@0 288 #endif
michael@0 289 #if defined(HAS_INTERPOLATEROW_SSSE3)
michael@0 290 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
michael@0 291 InterpolateRow = InterpolateRow_Any_SSSE3;
michael@0 292 if (IS_ALIGNED(dst_width, 4)) {
michael@0 293 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
michael@0 294 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 295 InterpolateRow = InterpolateRow_SSSE3;
michael@0 296 }
michael@0 297 }
michael@0 298 }
michael@0 299 #endif
michael@0 300 #if defined(HAS_INTERPOLATEROW_AVX2)
michael@0 301 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
michael@0 302 InterpolateRow = InterpolateRow_Any_AVX2;
michael@0 303 if (IS_ALIGNED(dst_width, 8)) {
michael@0 304 InterpolateRow = InterpolateRow_AVX2;
michael@0 305 }
michael@0 306 }
michael@0 307 #endif
michael@0 308 #if defined(HAS_INTERPOLATEROW_NEON)
michael@0 309 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
michael@0 310 InterpolateRow = InterpolateRow_Any_NEON;
michael@0 311 if (IS_ALIGNED(dst_width, 4)) {
michael@0 312 InterpolateRow = InterpolateRow_NEON;
michael@0 313 }
michael@0 314 }
michael@0 315 #endif
michael@0 316 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
michael@0 317 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
michael@0 318 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
michael@0 319 InterpolateRow = InterpolateRow_MIPS_DSPR2;
michael@0 320 }
michael@0 321 #endif
michael@0 322 if (src_width >= 32768) {
michael@0 323 ScaleARGBFilterCols = filtering ?
michael@0 324 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
michael@0 325 }
michael@0 326 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
michael@0 327 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
michael@0 328 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
michael@0 329 }
michael@0 330 #endif
michael@0 331 #if defined(HAS_SCALEARGBCOLS_SSE2)
michael@0 332 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
michael@0 333 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
michael@0 334 }
michael@0 335 #endif
michael@0 336 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
michael@0 337 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
michael@0 338 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
michael@0 339 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
michael@0 340 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
michael@0 341 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 342 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
michael@0 343 }
michael@0 344 #endif
michael@0 345 }
michael@0 346
michael@0 347 const int max_y = (src_height - 1) << 16;
michael@0 348 if (y > max_y) {
michael@0 349 y = max_y;
michael@0 350 }
michael@0 351 int yi = y >> 16;
michael@0 352 const uint8* src = src_argb + yi * src_stride;
michael@0 353
michael@0 354 // Allocate 2 rows of ARGB.
michael@0 355 const int kRowSize = (dst_width * 4 + 15) & ~15;
michael@0 356 align_buffer_64(row, kRowSize * 2);
michael@0 357
michael@0 358 uint8* rowptr = row;
michael@0 359 int rowstride = kRowSize;
michael@0 360 int lasty = yi;
michael@0 361
michael@0 362 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
michael@0 363 if (src_height > 1) {
michael@0 364 src += src_stride;
michael@0 365 }
michael@0 366 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
michael@0 367 src += src_stride;
michael@0 368
michael@0 369 for (j = 0; j < dst_height; ++j) {
michael@0 370 yi = y >> 16;
michael@0 371 if (yi != lasty) {
michael@0 372 if (y > max_y) {
michael@0 373 y = max_y;
michael@0 374 yi = y >> 16;
michael@0 375 src = src_argb + yi * src_stride;
michael@0 376 }
michael@0 377 if (yi != lasty) {
michael@0 378 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
michael@0 379 rowptr += rowstride;
michael@0 380 rowstride = -rowstride;
michael@0 381 lasty = yi;
michael@0 382 src += src_stride;
michael@0 383 }
michael@0 384 }
michael@0 385 if (filtering == kFilterLinear) {
michael@0 386 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
michael@0 387 } else {
michael@0 388 int yf = (y >> 8) & 255;
michael@0 389 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
michael@0 390 }
michael@0 391 dst_argb += dst_stride;
michael@0 392 y += dy;
michael@0 393 }
michael@0 394 free_aligned_buffer_64(row);
michael@0 395 }
michael@0 396
michael@0 397 #ifdef YUVSCALEUP
michael@0 398 // Scale YUV to ARGB up with bilinear interpolation.
michael@0 399 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
michael@0 400 int dst_width, int dst_height,
michael@0 401 int src_stride_y,
michael@0 402 int src_stride_u,
michael@0 403 int src_stride_v,
michael@0 404 int dst_stride_argb,
michael@0 405 const uint8* src_y,
michael@0 406 const uint8* src_u,
michael@0 407 const uint8* src_v,
michael@0 408 uint8* dst_argb,
michael@0 409 int x, int dx, int y, int dy,
michael@0 410 enum FilterMode filtering) {
michael@0 411 int j;
michael@0 412 void (*I422ToARGBRow)(const uint8* y_buf,
michael@0 413 const uint8* u_buf,
michael@0 414 const uint8* v_buf,
michael@0 415 uint8* rgb_buf,
michael@0 416 int width) = I422ToARGBRow_C;
michael@0 417 #if defined(HAS_I422TOARGBROW_SSSE3)
michael@0 418 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
michael@0 419 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
michael@0 420 if (IS_ALIGNED(src_width, 8)) {
michael@0 421 I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
michael@0 422 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
michael@0 423 I422ToARGBRow = I422ToARGBRow_SSSE3;
michael@0 424 }
michael@0 425 }
michael@0 426 }
michael@0 427 #endif
michael@0 428 #if defined(HAS_I422TOARGBROW_AVX2)
michael@0 429 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
michael@0 430 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
michael@0 431 if (IS_ALIGNED(src_width, 16)) {
michael@0 432 I422ToARGBRow = I422ToARGBRow_AVX2;
michael@0 433 }
michael@0 434 }
michael@0 435 #endif
michael@0 436 #if defined(HAS_I422TOARGBROW_NEON)
michael@0 437 if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
michael@0 438 I422ToARGBRow = I422ToARGBRow_Any_NEON;
michael@0 439 if (IS_ALIGNED(src_width, 8)) {
michael@0 440 I422ToARGBRow = I422ToARGBRow_NEON;
michael@0 441 }
michael@0 442 }
michael@0 443 #endif
michael@0 444 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
michael@0 445 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
michael@0 446 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
michael@0 447 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
michael@0 448 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
michael@0 449 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
michael@0 450 I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
michael@0 451 }
michael@0 452 #endif
michael@0 453
michael@0 454 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
michael@0 455 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
michael@0 456 InterpolateRow_C;
michael@0 457 #if defined(HAS_INTERPOLATEROW_SSE2)
michael@0 458 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
michael@0 459 InterpolateRow = InterpolateRow_Any_SSE2;
michael@0 460 if (IS_ALIGNED(dst_width, 4)) {
michael@0 461 InterpolateRow = InterpolateRow_Unaligned_SSE2;
michael@0 462 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
michael@0 463 InterpolateRow = InterpolateRow_SSE2;
michael@0 464 }
michael@0 465 }
michael@0 466 }
michael@0 467 #endif
michael@0 468 #if defined(HAS_INTERPOLATEROW_SSSE3)
michael@0 469 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
michael@0 470 InterpolateRow = InterpolateRow_Any_SSSE3;
michael@0 471 if (IS_ALIGNED(dst_width, 4)) {
michael@0 472 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
michael@0 473 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
michael@0 474 InterpolateRow = InterpolateRow_SSSE3;
michael@0 475 }
michael@0 476 }
michael@0 477 }
michael@0 478 #endif
michael@0 479 #if defined(HAS_INTERPOLATEROW_AVX2)
michael@0 480 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
michael@0 481 InterpolateRow = InterpolateRow_Any_AVX2;
michael@0 482 if (IS_ALIGNED(dst_width, 8)) {
michael@0 483 InterpolateRow = InterpolateRow_AVX2;
michael@0 484 }
michael@0 485 }
michael@0 486 #endif
michael@0 487 #if defined(HAS_INTERPOLATEROW_NEON)
michael@0 488 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
michael@0 489 InterpolateRow = InterpolateRow_Any_NEON;
michael@0 490 if (IS_ALIGNED(dst_width, 4)) {
michael@0 491 InterpolateRow = InterpolateRow_NEON;
michael@0 492 }
michael@0 493 }
michael@0 494 #endif
michael@0 495 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
michael@0 496 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
michael@0 497 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
michael@0 498 InterpolateRow = InterpolateRow_MIPS_DSPR2;
michael@0 499 }
michael@0 500 #endif
michael@0 501
michael@0 502 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
michael@0 503 int dst_width, int x, int dx) =
michael@0 504 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
michael@0 505 if (src_width >= 32768) {
michael@0 506 ScaleARGBFilterCols = filtering ?
michael@0 507 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
michael@0 508 }
michael@0 509 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
michael@0 510 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
michael@0 511 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
michael@0 512 }
michael@0 513 #endif
michael@0 514 #if defined(HAS_SCALEARGBCOLS_SSE2)
michael@0 515 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
michael@0 516 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
michael@0 517 }
michael@0 518 #endif
michael@0 519 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
michael@0 520 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
michael@0 521 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
michael@0 522 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
michael@0 523 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
michael@0 524 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 525 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
michael@0 526 }
michael@0 527 #endif
michael@0 528 }
michael@0 529
michael@0 530 const int max_y = (src_height - 1) << 16;
michael@0 531 if (y > max_y) {
michael@0 532 y = max_y;
michael@0 533 }
michael@0 534 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
michael@0 535 int yi = y >> 16;
michael@0 536 int uv_yi = yi >> kYShift;
michael@0 537 const uint8* src_row_y = src_y + yi * src_stride_y;
michael@0 538 const uint8* src_row_u = src_u + uv_yi * src_stride_u;
michael@0 539 const uint8* src_row_v = src_v + uv_yi * src_stride_v;
michael@0 540
michael@0 541 // Allocate 2 rows of ARGB.
michael@0 542 const int kRowSize = (dst_width * 4 + 15) & ~15;
michael@0 543 align_buffer_64(row, kRowSize * 2);
michael@0 544
michael@0 545 // Allocate 1 row of ARGB for source conversion.
michael@0 546 align_buffer_64(argb_row, src_width * 4);
michael@0 547
michael@0 548 uint8* rowptr = row;
michael@0 549 int rowstride = kRowSize;
michael@0 550 int lasty = yi;
michael@0 551
michael@0 552 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
michael@0 553 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
michael@0 554 if (src_height > 1) {
michael@0 555 src_row_y += src_stride_y;
michael@0 556 if (yi & 1) {
michael@0 557 src_row_u += src_stride_u;
michael@0 558 src_row_v += src_stride_v;
michael@0 559 }
michael@0 560 }
michael@0 561 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
michael@0 562 if (src_height > 2) {
michael@0 563 src_row_y += src_stride_y;
michael@0 564 if (!(yi & 1)) {
michael@0 565 src_row_u += src_stride_u;
michael@0 566 src_row_v += src_stride_v;
michael@0 567 }
michael@0 568 }
michael@0 569
michael@0 570 for (j = 0; j < dst_height; ++j) {
michael@0 571 yi = y >> 16;
michael@0 572 if (yi != lasty) {
michael@0 573 if (y > max_y) {
michael@0 574 y = max_y;
michael@0 575 yi = y >> 16;
michael@0 576 uv_yi = yi >> kYShift;
michael@0 577 src_row_y = src_y + yi * src_stride_y;
michael@0 578 src_row_u = src_u + uv_yi * src_stride_u;
michael@0 579 src_row_v = src_v + uv_yi * src_stride_v;
michael@0 580 }
michael@0 581 if (yi != lasty) {
michael@0 582 // TODO(fbarchard): Convert the clipped region of row.
michael@0 583 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
michael@0 584 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
michael@0 585 rowptr += rowstride;
michael@0 586 rowstride = -rowstride;
michael@0 587 lasty = yi;
michael@0 588 src_row_y += src_stride_y;
michael@0 589 if (yi & 1) {
michael@0 590 src_row_u += src_stride_u;
michael@0 591 src_row_v += src_stride_v;
michael@0 592 }
michael@0 593 }
michael@0 594 }
michael@0 595 if (filtering == kFilterLinear) {
michael@0 596 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
michael@0 597 } else {
michael@0 598 int yf = (y >> 8) & 255;
michael@0 599 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
michael@0 600 }
michael@0 601 dst_argb += dst_stride_argb;
michael@0 602 y += dy;
michael@0 603 }
michael@0 604 free_aligned_buffer_64(row);
michael@0 605 free_aligned_buffer_64(row_argb);
michael@0 606 }
michael@0 607 #endif
michael@0 608
michael@0 609 // Scale ARGB to/from any dimensions, without interpolation.
michael@0 610 // Fixed point math is used for performance: The upper 16 bits
michael@0 611 // of x and dx is the integer part of the source position and
michael@0 612 // the lower 16 bits are the fixed decimal part.
michael@0 613
michael@0 614 static void ScaleARGBSimple(int src_width, int src_height,
michael@0 615 int dst_width, int dst_height,
michael@0 616 int src_stride, int dst_stride,
michael@0 617 const uint8* src_argb, uint8* dst_argb,
michael@0 618 int x, int dx, int y, int dy) {
michael@0 619 int j;
michael@0 620 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
michael@0 621 int dst_width, int x, int dx) =
michael@0 622 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
michael@0 623 #if defined(HAS_SCALEARGBCOLS_SSE2)
michael@0 624 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
michael@0 625 ScaleARGBCols = ScaleARGBCols_SSE2;
michael@0 626 }
michael@0 627 #endif
michael@0 628 if (src_width * 2 == dst_width && x < 0x8000) {
michael@0 629 ScaleARGBCols = ScaleARGBColsUp2_C;
michael@0 630 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
michael@0 631 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
michael@0 632 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
michael@0 633 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
michael@0 634 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
michael@0 635 }
michael@0 636 #endif
michael@0 637 }
michael@0 638
michael@0 639 for (j = 0; j < dst_height; ++j) {
michael@0 640 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
michael@0 641 dst_width, x, dx);
michael@0 642 dst_argb += dst_stride;
michael@0 643 y += dy;
michael@0 644 }
michael@0 645 }
michael@0 646
michael@0 647 // ScaleARGB a ARGB.
michael@0 648 // This function in turn calls a scaling function
michael@0 649 // suitable for handling the desired resolutions.
michael@0 650 static void ScaleARGB(const uint8* src, int src_stride,
michael@0 651 int src_width, int src_height,
michael@0 652 uint8* dst, int dst_stride,
michael@0 653 int dst_width, int dst_height,
michael@0 654 int clip_x, int clip_y, int clip_width, int clip_height,
michael@0 655 enum FilterMode filtering) {
michael@0 656 // Initial source x/y coordinate and step values as 16.16 fixed point.
michael@0 657 int x = 0;
michael@0 658 int y = 0;
michael@0 659 int dx = 0;
michael@0 660 int dy = 0;
michael@0 661 // ARGB does not support box filter yet, but allow the user to pass it.
michael@0 662 // Simplify filtering when possible.
michael@0 663 filtering = ScaleFilterReduce(src_width, src_height,
michael@0 664 dst_width, dst_height,
michael@0 665 filtering);
michael@0 666
michael@0 667 // Negative src_height means invert the image.
michael@0 668 if (src_height < 0) {
michael@0 669 src_height = -src_height;
michael@0 670 src = src + (src_height - 1) * src_stride;
michael@0 671 src_stride = -src_stride;
michael@0 672 }
michael@0 673 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
michael@0 674 &x, &y, &dx, &dy);
michael@0 675 src_width = Abs(src_width);
michael@0 676 if (clip_x) {
michael@0 677 int64 clipf = (int64)(clip_x) * dx;
michael@0 678 x += (clipf & 0xffff);
michael@0 679 src += (clipf >> 16) * 4;
michael@0 680 dst += clip_x * 4;
michael@0 681 }
michael@0 682 if (clip_y) {
michael@0 683 int64 clipf = (int64)(clip_y) * dy;
michael@0 684 y += (clipf & 0xffff);
michael@0 685 src += (clipf >> 16) * src_stride;
michael@0 686 dst += clip_y * dst_stride;
michael@0 687 }
michael@0 688
michael@0 689 // Special case for integer step values.
michael@0 690 if (((dx | dy) & 0xffff) == 0) {
michael@0 691 if (!dx || !dy) { // 1 pixel wide and/or tall.
michael@0 692 filtering = kFilterNone;
michael@0 693 } else {
michael@0 694 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
michael@0 695 if (!(dx & 0x10000) && !(dy & 0x10000)) {
michael@0 696 if (dx == 0x20000) {
michael@0 697 // Optimized 1/2 downsample.
michael@0 698 ScaleARGBDown2(src_width, src_height,
michael@0 699 clip_width, clip_height,
michael@0 700 src_stride, dst_stride, src, dst,
michael@0 701 x, dx, y, dy, filtering);
michael@0 702 return;
michael@0 703 }
michael@0 704 if (dx == 0x40000 && filtering == kFilterBox) {
michael@0 705 // Optimized 1/4 box downsample.
michael@0 706 ScaleARGBDown4Box(src_width, src_height,
michael@0 707 clip_width, clip_height,
michael@0 708 src_stride, dst_stride, src, dst,
michael@0 709 x, dx, y, dy);
michael@0 710 return;
michael@0 711 }
michael@0 712 ScaleARGBDownEven(src_width, src_height,
michael@0 713 clip_width, clip_height,
michael@0 714 src_stride, dst_stride, src, dst,
michael@0 715 x, dx, y, dy, filtering);
michael@0 716 return;
michael@0 717 }
michael@0 718 // Optimized odd scale down. ie 3, 5, 7, 9x.
michael@0 719 if ((dx & 0x10000) && (dy & 0x10000)) {
michael@0 720 filtering = kFilterNone;
michael@0 721 if (dx == 0x10000 && dy == 0x10000) {
michael@0 722 // Straight copy.
michael@0 723 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
michael@0 724 dst, dst_stride, clip_width, clip_height);
michael@0 725 return;
michael@0 726 }
michael@0 727 }
michael@0 728 }
michael@0 729 }
michael@0 730 if (dx == 0x10000 && (x & 0xffff) == 0) {
michael@0 731 // Arbitrary scale vertically, but unscaled vertically.
michael@0 732 ScalePlaneVertical(src_height,
michael@0 733 clip_width, clip_height,
michael@0 734 src_stride, dst_stride, src, dst,
michael@0 735 x, y, dy, 4, filtering);
michael@0 736 return;
michael@0 737 }
michael@0 738 if (filtering && dy < 65536) {
michael@0 739 ScaleARGBBilinearUp(src_width, src_height,
michael@0 740 clip_width, clip_height,
michael@0 741 src_stride, dst_stride, src, dst,
michael@0 742 x, dx, y, dy, filtering);
michael@0 743 return;
michael@0 744 }
michael@0 745 if (filtering) {
michael@0 746 ScaleARGBBilinearDown(src_width, src_height,
michael@0 747 clip_width, clip_height,
michael@0 748 src_stride, dst_stride, src, dst,
michael@0 749 x, dx, y, dy, filtering);
michael@0 750 return;
michael@0 751 }
michael@0 752 ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
michael@0 753 src_stride, dst_stride, src, dst,
michael@0 754 x, dx, y, dy);
michael@0 755 }
michael@0 756
michael@0 757 LIBYUV_API
michael@0 758 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
michael@0 759 int src_width, int src_height,
michael@0 760 uint8* dst_argb, int dst_stride_argb,
michael@0 761 int dst_width, int dst_height,
michael@0 762 int clip_x, int clip_y, int clip_width, int clip_height,
michael@0 763 enum FilterMode filtering) {
michael@0 764 if (!src_argb || src_width == 0 || src_height == 0 ||
michael@0 765 !dst_argb || dst_width <= 0 || dst_height <= 0 ||
michael@0 766 clip_x < 0 || clip_y < 0 ||
michael@0 767 (clip_x + clip_width) > dst_width ||
michael@0 768 (clip_y + clip_height) > dst_height) {
michael@0 769 return -1;
michael@0 770 }
michael@0 771 ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
michael@0 772 dst_argb, dst_stride_argb, dst_width, dst_height,
michael@0 773 clip_x, clip_y, clip_width, clip_height, filtering);
michael@0 774 return 0;
michael@0 775 }
michael@0 776
michael@0 777 // Scale an ARGB image.
michael@0 778 LIBYUV_API
michael@0 779 int ARGBScale(const uint8* src_argb, int src_stride_argb,
michael@0 780 int src_width, int src_height,
michael@0 781 uint8* dst_argb, int dst_stride_argb,
michael@0 782 int dst_width, int dst_height,
michael@0 783 enum FilterMode filtering) {
michael@0 784 if (!src_argb || src_width == 0 || src_height == 0 ||
michael@0 785 !dst_argb || dst_width <= 0 || dst_height <= 0) {
michael@0 786 return -1;
michael@0 787 }
michael@0 788 ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
michael@0 789 dst_argb, dst_stride_argb, dst_width, dst_height,
michael@0 790 0, 0, dst_width, dst_height, filtering);
michael@0 791 return 0;
michael@0 792 }
michael@0 793
michael@0 794 #ifdef __cplusplus
michael@0 795 } // extern "C"
michael@0 796 } // namespace libyuv
michael@0 797 #endif

mercurial