media/libyuv/source/scale_argb.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS. All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "libyuv/scale.h"
    13 #include <assert.h>
    14 #include <string.h>
    16 #include "libyuv/cpu_id.h"
    17 #include "libyuv/planar_functions.h"  // For CopyARGB
    18 #include "libyuv/row.h"
    19 #include "libyuv/scale_row.h"
    21 #ifdef __cplusplus
    22 namespace libyuv {
    23 extern "C" {
    24 #endif
    26 static __inline int Abs(int v) {
    27   return v >= 0 ? v : -v;
    28 }
    30 // ScaleARGB ARGB, 1/2
    31 // This is an optimized version for scaling down a ARGB to 1/2 of
    32 // its original size.
    33 static void ScaleARGBDown2(int src_width, int src_height,
    34                            int dst_width, int dst_height,
    35                            int src_stride, int dst_stride,
    36                            const uint8* src_argb, uint8* dst_argb,
    37                            int x, int dx, int y, int dy,
    38                            enum FilterMode filtering) {
    39   int j;
    40   int row_stride = src_stride * (dy >> 16);
    41   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
    42                             uint8* dst_argb, int dst_width) =
    43     filtering == kFilterNone ? ScaleARGBRowDown2_C :
    44         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
    45         ScaleARGBRowDown2Box_C);
    46   assert(dx == 65536 * 2);  // Test scale factor of 2.
    47   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
    48   // Advance to odd row, even column.
    49   if (filtering == kFilterBilinear) {
    50     src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
    51   } else {
    52     src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
    53   }
    55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
    56   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
    57       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
    58       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
    59     ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
    60         (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
    61         ScaleARGBRowDown2Box_SSE2);
    62   }
    63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
    64   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
    65       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
    66     ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
    67         ScaleARGBRowDown2_NEON;
    68   }
    69 #endif
    71   if (filtering == kFilterLinear) {
    72     src_stride = 0;
    73   }
    74   for (j = 0; j < dst_height; ++j) {
    75     ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
    76     src_argb += row_stride;
    77     dst_argb += dst_stride;
    78   }
    79 }
    81 // ScaleARGB ARGB, 1/4
    82 // This is an optimized version for scaling down a ARGB to 1/4 of
    83 // its original size.
    84 static void ScaleARGBDown4Box(int src_width, int src_height,
    85                               int dst_width, int dst_height,
    86                               int src_stride, int dst_stride,
    87                               const uint8* src_argb, uint8* dst_argb,
    88                               int x, int dx, int y, int dy) {
    89   int j;
    90   // Allocate 2 rows of ARGB.
    91   const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
    92   align_buffer_64(row, kRowSize * 2);
    93   int row_stride = src_stride * (dy >> 16);
    94   void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
    95     uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
    96   // Advance to odd row, even column.
    97   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
    98   assert(dx == 65536 * 4);  // Test scale factor of 4.
    99   assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
   100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
   101   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
   102       IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
   103       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   104     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
   105   }
   106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
   107   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
   108       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
   109     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
   110   }
   111 #endif
   112   for (j = 0; j < dst_height; ++j) {
   113     ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
   114     ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
   115                       row + kRowSize, dst_width * 2);
   116     ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
   117     src_argb += row_stride;
   118     dst_argb += dst_stride;
   119   }
   120   free_aligned_buffer_64(row);
   121 }
   123 // ScaleARGB ARGB Even
   124 // This is an optimized version for scaling down a ARGB to even
   125 // multiple of its original size.
   126 static void ScaleARGBDownEven(int src_width, int src_height,
   127                               int dst_width, int dst_height,
   128                               int src_stride, int dst_stride,
   129                               const uint8* src_argb, uint8* dst_argb,
   130                               int x, int dx, int y, int dy,
   131                               enum FilterMode filtering) {
   132   int j;
   133   int col_step = dx >> 16;
   134   int row_stride = (dy >> 16) * src_stride;
   135   void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
   136                                int src_step, uint8* dst_argb, int dst_width) =
   137       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
   138   assert(IS_ALIGNED(src_width, 2));
   139   assert(IS_ALIGNED(src_height, 2));
   140   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
   141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
   142   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
   143       IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   144     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
   145         ScaleARGBRowDownEven_SSE2;
   146   }
   147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
   148   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
   149       IS_ALIGNED(src_argb, 4)) {
   150     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
   151         ScaleARGBRowDownEven_NEON;
   152   }
   153 #endif
   155   if (filtering == kFilterLinear) {
   156     src_stride = 0;
   157   }
   158   for (j = 0; j < dst_height; ++j) {
   159     ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
   160     src_argb += row_stride;
   161     dst_argb += dst_stride;
   162   }
   163 }
   165 // Scale ARGB down with bilinear interpolation.
   166 static void ScaleARGBBilinearDown(int src_width, int src_height,
   167                                   int dst_width, int dst_height,
   168                                   int src_stride, int dst_stride,
   169                                   const uint8* src_argb, uint8* dst_argb,
   170                                   int x, int dx, int y, int dy,
   171                                   enum FilterMode filtering) {
   172   int j;
   173   int64 xlast = x + (int64)(dst_width - 1) * dx;
   174   int64 xl = (dx >= 0) ? x : xlast;
   175   int64 xr = (dx >= 0) ? xlast : x;
   176   int clip_src_width;
   177   xl = (xl >> 16) & ~3;  // Left edge aligned.
   178   xr = (xr >> 16) + 1;  // Right most pixel used.
   179   clip_src_width = (((xr - xl) + 1 + 3) & ~3) * 4;  // Width aligned to 4.
   180   src_argb += xl * 4;
   181   x -= (int)(xl << 16);
   182   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
   183       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   184       InterpolateRow_C;
   185 #if defined(HAS_INTERPOLATEROW_SSE2)
   186   if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
   187     InterpolateRow = InterpolateRow_Any_SSE2;
   188     if (IS_ALIGNED(clip_src_width, 16)) {
   189       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   190       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
   191         InterpolateRow = InterpolateRow_SSE2;
   192       }
   193     }
   194   }
   195 #endif
   196 #if defined(HAS_INTERPOLATEROW_SSSE3)
   197   if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
   198     InterpolateRow = InterpolateRow_Any_SSSE3;
   199     if (IS_ALIGNED(clip_src_width, 16)) {
   200       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   201       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
   202         InterpolateRow = InterpolateRow_SSSE3;
   203       }
   204     }
   205   }
   206 #endif
   207 #if defined(HAS_INTERPOLATEROW_AVX2)
   208   if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
   209     InterpolateRow = InterpolateRow_Any_AVX2;
   210     if (IS_ALIGNED(clip_src_width, 32)) {
   211       InterpolateRow = InterpolateRow_AVX2;
   212     }
   213   }
   214 #endif
   215 #if defined(HAS_INTERPOLATEROW_NEON)
   216   if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
   217     InterpolateRow = InterpolateRow_Any_NEON;
   218     if (IS_ALIGNED(clip_src_width, 16)) {
   219       InterpolateRow = InterpolateRow_NEON;
   220     }
   221   }
   222 #endif
   223 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
   224   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
   225       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
   226     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
   227     if (IS_ALIGNED(clip_src_width, 4)) {
   228       InterpolateRow = InterpolateRow_MIPS_DSPR2;
   229     }
   230   }
   231 #endif
   232   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
   233       int dst_width, int x, int dx) =
   234       (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
   235 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
   236   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   237     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
   238   }
   239 #endif
   240   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   241   // Allocate a row of ARGB.
   242   align_buffer_64(row, clip_src_width * 4);
   244   const int max_y = (src_height - 1) << 16;
   245   for (j = 0; j < dst_height; ++j) {
   246     if (y > max_y) {
   247       y = max_y;
   248     }
   249     int yi = y >> 16;
   250     const uint8* src = src_argb + yi * src_stride;
   251     if (filtering == kFilterLinear) {
   252       ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
   253     } else {
   254       int yf = (y >> 8) & 255;
   255       InterpolateRow(row, src, src_stride, clip_src_width, yf);
   256       ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
   257     }
   258     dst_argb += dst_stride;
   259     y += dy;
   260   }
   261   free_aligned_buffer_64(row);
   262 }
   264 // Scale ARGB up with bilinear interpolation.
   265 static void ScaleARGBBilinearUp(int src_width, int src_height,
   266                                 int dst_width, int dst_height,
   267                                 int src_stride, int dst_stride,
   268                                 const uint8* src_argb, uint8* dst_argb,
   269                                 int x, int dx, int y, int dy,
   270                                 enum FilterMode filtering) {
   271   int j;
   272   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
   273       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   274       InterpolateRow_C;
   275   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
   276       int dst_width, int x, int dx) =
   277       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
   278 #if defined(HAS_INTERPOLATEROW_SSE2)
   279   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
   280     InterpolateRow = InterpolateRow_Any_SSE2;
   281     if (IS_ALIGNED(dst_width, 4)) {
   282       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   283       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   284         InterpolateRow = InterpolateRow_SSE2;
   285       }
   286     }
   287   }
   288 #endif
   289 #if defined(HAS_INTERPOLATEROW_SSSE3)
   290   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
   291     InterpolateRow = InterpolateRow_Any_SSSE3;
   292     if (IS_ALIGNED(dst_width, 4)) {
   293       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   294       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   295         InterpolateRow = InterpolateRow_SSSE3;
   296       }
   297     }
   298   }
   299 #endif
   300 #if defined(HAS_INTERPOLATEROW_AVX2)
   301   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
   302     InterpolateRow = InterpolateRow_Any_AVX2;
   303     if (IS_ALIGNED(dst_width, 8)) {
   304       InterpolateRow = InterpolateRow_AVX2;
   305     }
   306   }
   307 #endif
   308 #if defined(HAS_INTERPOLATEROW_NEON)
   309   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
   310     InterpolateRow = InterpolateRow_Any_NEON;
   311     if (IS_ALIGNED(dst_width, 4)) {
   312       InterpolateRow = InterpolateRow_NEON;
   313     }
   314   }
   315 #endif
   316 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
   317   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
   318       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
   319     InterpolateRow = InterpolateRow_MIPS_DSPR2;
   320   }
   321 #endif
   322   if (src_width >= 32768) {
   323     ScaleARGBFilterCols = filtering ?
   324         ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
   325   }
   326 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
   327   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   328     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
   329   }
   330 #endif
   331 #if defined(HAS_SCALEARGBCOLS_SSE2)
   332   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
   333     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
   334   }
   335 #endif
   336   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
   337     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
   338 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
   339     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   340         IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
   341         IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   342       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
   343     }
   344 #endif
   345   }
   347   const int max_y = (src_height - 1) << 16;
   348   if (y > max_y) {
   349     y = max_y;
   350   }
   351   int yi = y >> 16;
   352   const uint8* src = src_argb + yi * src_stride;
   354   // Allocate 2 rows of ARGB.
   355   const int kRowSize = (dst_width * 4 + 15) & ~15;
   356   align_buffer_64(row, kRowSize * 2);
   358   uint8* rowptr = row;
   359   int rowstride = kRowSize;
   360   int lasty = yi;
   362   ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
   363   if (src_height > 1) {
   364     src += src_stride;
   365   }
   366   ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
   367   src += src_stride;
   369   for (j = 0; j < dst_height; ++j) {
   370     yi = y >> 16;
   371     if (yi != lasty) {
   372       if (y > max_y) {
   373         y = max_y;
   374         yi = y >> 16;
   375         src = src_argb + yi * src_stride;
   376       }
   377       if (yi != lasty) {
   378         ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
   379         rowptr += rowstride;
   380         rowstride = -rowstride;
   381         lasty = yi;
   382         src += src_stride;
   383       }
   384     }
   385     if (filtering == kFilterLinear) {
   386       InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
   387     } else {
   388       int yf = (y >> 8) & 255;
   389       InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
   390     }
   391     dst_argb += dst_stride;
   392     y += dy;
   393   }
   394   free_aligned_buffer_64(row);
   395 }
   397 #ifdef YUVSCALEUP
   398 // Scale YUV to ARGB up with bilinear interpolation.
   399 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
   400                                      int dst_width, int dst_height,
   401                                      int src_stride_y,
   402                                      int src_stride_u,
   403                                      int src_stride_v,
   404                                      int dst_stride_argb,
   405                                      const uint8* src_y,
   406                                      const uint8* src_u,
   407                                      const uint8* src_v,
   408                                      uint8* dst_argb,
   409                                      int x, int dx, int y, int dy,
   410                                      enum FilterMode filtering) {
   411   int j;
   412   void (*I422ToARGBRow)(const uint8* y_buf,
   413                         const uint8* u_buf,
   414                         const uint8* v_buf,
   415                         uint8* rgb_buf,
   416                         int width) = I422ToARGBRow_C;
   417 #if defined(HAS_I422TOARGBROW_SSSE3)
   418   if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
   419     I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
   420     if (IS_ALIGNED(src_width, 8)) {
   421       I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
   422       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   423         I422ToARGBRow = I422ToARGBRow_SSSE3;
   424       }
   425     }
   426   }
   427 #endif
   428 #if defined(HAS_I422TOARGBROW_AVX2)
   429   if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
   430     I422ToARGBRow = I422ToARGBRow_Any_AVX2;
   431     if (IS_ALIGNED(src_width, 16)) {
   432       I422ToARGBRow = I422ToARGBRow_AVX2;
   433     }
   434   }
   435 #endif
   436 #if defined(HAS_I422TOARGBROW_NEON)
   437   if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
   438     I422ToARGBRow = I422ToARGBRow_Any_NEON;
   439     if (IS_ALIGNED(src_width, 8)) {
   440       I422ToARGBRow = I422ToARGBRow_NEON;
   441     }
   442   }
   443 #endif
   444 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
   445   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
   446       IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
   447       IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
   448       IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
   449       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
   450     I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
   451   }
   452 #endif
   454   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
   455       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   456       InterpolateRow_C;
   457 #if defined(HAS_INTERPOLATEROW_SSE2)
   458   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
   459     InterpolateRow = InterpolateRow_Any_SSE2;
   460     if (IS_ALIGNED(dst_width, 4)) {
   461       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   462       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   463         InterpolateRow = InterpolateRow_SSE2;
   464       }
   465     }
   466   }
   467 #endif
   468 #if defined(HAS_INTERPOLATEROW_SSSE3)
   469   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
   470     InterpolateRow = InterpolateRow_Any_SSSE3;
   471     if (IS_ALIGNED(dst_width, 4)) {
   472       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   473       if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
   474         InterpolateRow = InterpolateRow_SSSE3;
   475       }
   476     }
   477   }
   478 #endif
   479 #if defined(HAS_INTERPOLATEROW_AVX2)
   480   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
   481     InterpolateRow = InterpolateRow_Any_AVX2;
   482     if (IS_ALIGNED(dst_width, 8)) {
   483       InterpolateRow = InterpolateRow_AVX2;
   484     }
   485   }
   486 #endif
   487 #if defined(HAS_INTERPOLATEROW_NEON)
   488   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
   489     InterpolateRow = InterpolateRow_Any_NEON;
   490     if (IS_ALIGNED(dst_width, 4)) {
   491       InterpolateRow = InterpolateRow_NEON;
   492     }
   493   }
   494 #endif
   495 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
   496   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
   497       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
   498     InterpolateRow = InterpolateRow_MIPS_DSPR2;
   499   }
   500 #endif
   502   void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
   503       int dst_width, int x, int dx) =
   504       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
   505   if (src_width >= 32768) {
   506     ScaleARGBFilterCols = filtering ?
   507         ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
   508   }
   509 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
   510   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   511     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
   512   }
   513 #endif
   514 #if defined(HAS_SCALEARGBCOLS_SSE2)
   515   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
   516     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
   517   }
   518 #endif
   519   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
   520     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
   521 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
   522     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   523         IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
   524         IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   525       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
   526     }
   527 #endif
   528   }
   530   const int max_y = (src_height - 1) << 16;
   531   if (y > max_y) {
   532     y = max_y;
   533   }
   534   const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
   535   int yi = y >> 16;
   536   int uv_yi = yi >> kYShift;
   537   const uint8* src_row_y = src_y + yi * src_stride_y;
   538   const uint8* src_row_u = src_u + uv_yi * src_stride_u;
   539   const uint8* src_row_v = src_v + uv_yi * src_stride_v;
   541   // Allocate 2 rows of ARGB.
   542   const int kRowSize = (dst_width * 4 + 15) & ~15;
   543   align_buffer_64(row, kRowSize * 2);
   545   // Allocate 1 row of ARGB for source conversion.
   546   align_buffer_64(argb_row, src_width * 4);
   548   uint8* rowptr = row;
   549   int rowstride = kRowSize;
   550   int lasty = yi;
   552   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
   553   ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
   554   if (src_height > 1) {
   555     src_row_y += src_stride_y;
   556     if (yi & 1) {
   557       src_row_u += src_stride_u;
   558       src_row_v += src_stride_v;
   559     }
   560   }
   561   ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
   562   if (src_height > 2) {
   563     src_row_y += src_stride_y;
   564     if (!(yi & 1)) {
   565       src_row_u += src_stride_u;
   566       src_row_v += src_stride_v;
   567     }
   568   }
   570   for (j = 0; j < dst_height; ++j) {
   571     yi = y >> 16;
   572     if (yi != lasty) {
   573       if (y > max_y) {
   574         y = max_y;
   575         yi = y >> 16;
   576         uv_yi = yi >> kYShift;
   577         src_row_y = src_y + yi * src_stride_y;
   578         src_row_u = src_u + uv_yi * src_stride_u;
   579         src_row_v = src_v + uv_yi * src_stride_v;
   580       }
   581       if (yi != lasty) {
   582         // TODO(fbarchard): Convert the clipped region of row.
   583         I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
   584         ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
   585         rowptr += rowstride;
   586         rowstride = -rowstride;
   587         lasty = yi;
   588         src_row_y += src_stride_y;
   589         if (yi & 1) {
   590           src_row_u += src_stride_u;
   591           src_row_v += src_stride_v;
   592         }
   593       }
   594     }
   595     if (filtering == kFilterLinear) {
   596       InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
   597     } else {
   598       int yf = (y >> 8) & 255;
   599       InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
   600     }
   601     dst_argb += dst_stride_argb;
   602     y += dy;
   603   }
   604   free_aligned_buffer_64(row);
   605   free_aligned_buffer_64(row_argb);
   606 }
   607 #endif
   609 // Scale ARGB to/from any dimensions, without interpolation.
   610 // Fixed point math is used for performance: The upper 16 bits
   611 // of x and dx is the integer part of the source position and
   612 // the lower 16 bits are the fixed decimal part.
   614 static void ScaleARGBSimple(int src_width, int src_height,
   615                             int dst_width, int dst_height,
   616                             int src_stride, int dst_stride,
   617                             const uint8* src_argb, uint8* dst_argb,
   618                             int x, int dx, int y, int dy) {
   619   int j;
   620   void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
   621       int dst_width, int x, int dx) =
   622       (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
   623 #if defined(HAS_SCALEARGBCOLS_SSE2)
   624   if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
   625     ScaleARGBCols = ScaleARGBCols_SSE2;
   626   }
   627 #endif
   628   if (src_width * 2 == dst_width && x < 0x8000) {
   629     ScaleARGBCols = ScaleARGBColsUp2_C;
   630 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
   631     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   632         IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
   633         IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   634       ScaleARGBCols = ScaleARGBColsUp2_SSE2;
   635     }
   636 #endif
   637   }
   639   for (j = 0; j < dst_height; ++j) {
   640     ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
   641                   dst_width, x, dx);
   642     dst_argb += dst_stride;
   643     y += dy;
   644   }
   645 }
   647 // ScaleARGB a ARGB.
   648 // This function in turn calls a scaling function
   649 // suitable for handling the desired resolutions.
   650 static void ScaleARGB(const uint8* src, int src_stride,
   651                       int src_width, int src_height,
   652                       uint8* dst, int dst_stride,
   653                       int dst_width, int dst_height,
   654                       int clip_x, int clip_y, int clip_width, int clip_height,
   655                       enum FilterMode filtering) {
   656   // Initial source x/y coordinate and step values as 16.16 fixed point.
   657   int x = 0;
   658   int y = 0;
   659   int dx = 0;
   660   int dy = 0;
   661   // ARGB does not support box filter yet, but allow the user to pass it.
   662   // Simplify filtering when possible.
   663   filtering = ScaleFilterReduce(src_width, src_height,
   664                                 dst_width, dst_height,
   665                                 filtering);
   667   // Negative src_height means invert the image.
   668   if (src_height < 0) {
   669     src_height = -src_height;
   670     src = src + (src_height - 1) * src_stride;
   671     src_stride = -src_stride;
   672   }
   673   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
   674              &x, &y, &dx, &dy);
   675   src_width = Abs(src_width);
   676   if (clip_x) {
   677     int64 clipf = (int64)(clip_x) * dx;
   678     x += (clipf & 0xffff);
   679     src += (clipf >> 16) * 4;
   680     dst += clip_x * 4;
   681   }
   682   if (clip_y) {
   683     int64 clipf = (int64)(clip_y) * dy;
   684     y += (clipf & 0xffff);
   685     src += (clipf >> 16) * src_stride;
   686     dst += clip_y * dst_stride;
   687   }
   689   // Special case for integer step values.
   690   if (((dx | dy) & 0xffff) == 0) {
   691     if (!dx || !dy) {  // 1 pixel wide and/or tall.
   692       filtering = kFilterNone;
   693     } else {
   694       // Optimized even scale down. ie 2, 4, 6, 8, 10x.
   695       if (!(dx & 0x10000) && !(dy & 0x10000)) {
   696         if (dx == 0x20000) {
   697           // Optimized 1/2 downsample.
   698           ScaleARGBDown2(src_width, src_height,
   699                          clip_width, clip_height,
   700                          src_stride, dst_stride, src, dst,
   701                          x, dx, y, dy, filtering);
   702           return;
   703         }
   704         if (dx == 0x40000 && filtering == kFilterBox) {
   705           // Optimized 1/4 box downsample.
   706           ScaleARGBDown4Box(src_width, src_height,
   707                             clip_width, clip_height,
   708                             src_stride, dst_stride, src, dst,
   709                             x, dx, y, dy);
   710           return;
   711         }
   712         ScaleARGBDownEven(src_width, src_height,
   713                           clip_width, clip_height,
   714                           src_stride, dst_stride, src, dst,
   715                           x, dx, y, dy, filtering);
   716         return;
   717       }
   718       // Optimized odd scale down. ie 3, 5, 7, 9x.
   719       if ((dx & 0x10000) && (dy & 0x10000)) {
   720         filtering = kFilterNone;
   721         if (dx == 0x10000 && dy == 0x10000) {
   722           // Straight copy.
   723           ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
   724                    dst, dst_stride, clip_width, clip_height);
   725           return;
   726         }
   727       }
   728     }
   729   }
   730   if (dx == 0x10000 && (x & 0xffff) == 0) {
   731     // Arbitrary scale vertically, but unscaled vertically.
   732     ScalePlaneVertical(src_height,
   733                        clip_width, clip_height,
   734                        src_stride, dst_stride, src, dst,
   735                        x, y, dy, 4, filtering);
   736     return;
   737   }
   738   if (filtering && dy < 65536) {
   739     ScaleARGBBilinearUp(src_width, src_height,
   740                         clip_width, clip_height,
   741                         src_stride, dst_stride, src, dst,
   742                         x, dx, y, dy, filtering);
   743     return;
   744   }
   745   if (filtering) {
   746     ScaleARGBBilinearDown(src_width, src_height,
   747                           clip_width, clip_height,
   748                           src_stride, dst_stride, src, dst,
   749                           x, dx, y, dy, filtering);
   750     return;
   751   }
   752   ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
   753                   src_stride, dst_stride, src, dst,
   754                   x, dx, y, dy);
   755 }
   757 LIBYUV_API
   758 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
   759                   int src_width, int src_height,
   760                   uint8* dst_argb, int dst_stride_argb,
   761                   int dst_width, int dst_height,
   762                   int clip_x, int clip_y, int clip_width, int clip_height,
   763                   enum FilterMode filtering) {
   764   if (!src_argb || src_width == 0 || src_height == 0 ||
   765       !dst_argb || dst_width <= 0 || dst_height <= 0 ||
   766       clip_x < 0 || clip_y < 0 ||
   767       (clip_x + clip_width) > dst_width ||
   768       (clip_y + clip_height) > dst_height) {
   769     return -1;
   770   }
   771   ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
   772             dst_argb, dst_stride_argb, dst_width, dst_height,
   773             clip_x, clip_y, clip_width, clip_height, filtering);
   774   return 0;
   775 }
   777 // Scale an ARGB image.
   778 LIBYUV_API
   779 int ARGBScale(const uint8* src_argb, int src_stride_argb,
   780               int src_width, int src_height,
   781               uint8* dst_argb, int dst_stride_argb,
   782               int dst_width, int dst_height,
   783               enum FilterMode filtering) {
   784   if (!src_argb || src_width == 0 || src_height == 0 ||
   785       !dst_argb || dst_width <= 0 || dst_height <= 0) {
   786     return -1;
   787   }
   788   ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
   789             dst_argb, dst_stride_argb, dst_width, dst_height,
   790             0, 0, dst_width, dst_height, filtering);
   791   return 0;
   792 }
   794 #ifdef __cplusplus
   795 }  // extern "C"
   796 }  // namespace libyuv
   797 #endif

mercurial