media/libyuv/source/scale.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS. All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "libyuv/scale.h"
    13 #include <assert.h>
    14 #include <string.h>
    16 #include "libyuv/cpu_id.h"
    17 #include "libyuv/planar_functions.h"  // For CopyPlane
    18 #include "libyuv/row.h"
    19 #include "libyuv/scale_row.h"
    21 #ifdef __cplusplus
    22 namespace libyuv {
    23 extern "C" {
    24 #endif
    26 // Remove this macro if OVERREAD is safe.
    27 #define AVOID_OVERREAD 1
    29 static __inline int Abs(int v) {
    30   return v >= 0 ? v : -v;
    31 }
    33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
    35 // Scale plane, 1/2
    36 // This is an optimized version for scaling down a plane to 1/2 of
    37 // its original size.
    39 static void ScalePlaneDown2(int src_width, int src_height,
    40                             int dst_width, int dst_height,
    41                             int src_stride, int dst_stride,
    42                             const uint8* src_ptr, uint8* dst_ptr,
    43                             enum FilterMode filtering) {
    44   void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
    45                         uint8* dst_ptr, int dst_width) =
    46     filtering == kFilterNone ? ScaleRowDown2_C :
    47         (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
    48         ScaleRowDown2Box_C);
    49   int row_stride = src_stride << 1;
    50   if (!filtering) {
    51     src_ptr += src_stride;  // Point to odd rows.
    52     src_stride = 0;
    53   }
    55 #if defined(HAS_SCALEROWDOWN2_NEON)
    56   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
    57     ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
    58   }
    59 #elif defined(HAS_SCALEROWDOWN2_SSE2)
    60   if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
    61     ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
    62         (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
    63         ScaleRowDown2Box_Unaligned_SSE2);
    64     if (IS_ALIGNED(src_ptr, 16) &&
    65         IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
    66         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
    67       ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
    68           (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
    69           ScaleRowDown2Box_SSE2);
    70     }
    71   }
    72 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
    73   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
    74       IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
    75       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
    76     ScaleRowDown2 = filtering ?
    77         ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
    78   }
    79 #endif
    81   if (filtering == kFilterLinear) {
    82     src_stride = 0;
    83   }
    84   // TODO(fbarchard): Loop through source height to allow odd height.
    85   int y;
    86   for (y = 0; y < dst_height; ++y) {
    87     ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
    88     src_ptr += row_stride;
    89     dst_ptr += dst_stride;
    90   }
    91 }
    93 // Scale plane, 1/4
    94 // This is an optimized version for scaling down a plane to 1/4 of
    95 // its original size.
    97 static void ScalePlaneDown4(int src_width, int src_height,
    98                             int dst_width, int dst_height,
    99                             int src_stride, int dst_stride,
   100                             const uint8* src_ptr, uint8* dst_ptr,
   101                             enum FilterMode filtering) {
   102   void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
   103                         uint8* dst_ptr, int dst_width) =
   104       filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
   105   int row_stride = src_stride << 2;
   106   if (!filtering) {
   107     src_ptr += src_stride * 2;  // Point to row 2.
   108     src_stride = 0;
   109   }
   110 #if defined(HAS_SCALEROWDOWN4_NEON)
   111   if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
   112     ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
   113   }
   114 #elif defined(HAS_SCALEROWDOWN4_SSE2)
   115   if (TestCpuFlag(kCpuHasSSE2) &&
   116       IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
   117       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   118     ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
   119   }
   120 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
   121   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
   122       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
   123       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
   124     ScaleRowDown4 = filtering ?
   125         ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
   126   }
   127 #endif
   129   if (filtering == kFilterLinear) {
   130     src_stride = 0;
   131   }
   132   int y;
   133   for (y = 0; y < dst_height; ++y) {
   134     ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
   135     src_ptr += row_stride;
   136     dst_ptr += dst_stride;
   137   }
   138 }
   140 // Scale plane down, 3/4
   142 static void ScalePlaneDown34(int src_width, int src_height,
   143                              int dst_width, int dst_height,
   144                              int src_stride, int dst_stride,
   145                              const uint8* src_ptr, uint8* dst_ptr,
   146                              enum FilterMode filtering) {
   147   assert(dst_width % 3 == 0);
   148   void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
   149                            uint8* dst_ptr, int dst_width);
   150   void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
   151                            uint8* dst_ptr, int dst_width);
   152   if (!filtering) {
   153     ScaleRowDown34_0 = ScaleRowDown34_C;
   154     ScaleRowDown34_1 = ScaleRowDown34_C;
   155   } else {
   156     ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
   157     ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
   158   }
   159 #if defined(HAS_SCALEROWDOWN34_NEON)
   160   if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
   161     if (!filtering) {
   162       ScaleRowDown34_0 = ScaleRowDown34_NEON;
   163       ScaleRowDown34_1 = ScaleRowDown34_NEON;
   164     } else {
   165       ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
   166       ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
   167     }
   168   }
   169 #endif
   170 #if defined(HAS_SCALEROWDOWN34_SSSE3)
   171   if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
   172       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   173     if (!filtering) {
   174       ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
   175       ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
   176     } else {
   177       ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
   178       ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
   179     }
   180   }
   181 #endif
   182 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
   183   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
   184       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
   185       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
   186     if (!filtering) {
   187       ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
   188       ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
   189     } else {
   190       ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
   191       ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
   192     }
   193   }
   194 #endif
   196   const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
   197   int y;
   198   for (y = 0; y < dst_height - 2; y += 3) {
   199     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
   200     src_ptr += src_stride;
   201     dst_ptr += dst_stride;
   202     ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
   203     src_ptr += src_stride;
   204     dst_ptr += dst_stride;
   205     ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
   206                      dst_ptr, dst_width);
   207     src_ptr += src_stride * 2;
   208     dst_ptr += dst_stride;
   209   }
   211   // Remainder 1 or 2 rows with last row vertically unfiltered
   212   if ((dst_height % 3) == 2) {
   213     ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
   214     src_ptr += src_stride;
   215     dst_ptr += dst_stride;
   216     ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
   217   } else if ((dst_height % 3) == 1) {
   218     ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
   219   }
   220 }
   223 // Scale plane, 3/8
   224 // This is an optimized version for scaling down a plane to 3/8
   225 // of its original size.
   226 //
   227 // Uses box filter arranges like this
   228 // aaabbbcc -> abc
   229 // aaabbbcc    def
   230 // aaabbbcc    ghi
   231 // dddeeeff
   232 // dddeeeff
   233 // dddeeeff
   234 // ggghhhii
   235 // ggghhhii
   236 // Boxes are 3x3, 2x3, 3x2 and 2x2
   238 static void ScalePlaneDown38(int src_width, int src_height,
   239                              int dst_width, int dst_height,
   240                              int src_stride, int dst_stride,
   241                              const uint8* src_ptr, uint8* dst_ptr,
   242                              enum FilterMode filtering) {
   243   assert(dst_width % 3 == 0);
   244   void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
   245                            uint8* dst_ptr, int dst_width);
   246   void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
   247                            uint8* dst_ptr, int dst_width);
   248   if (!filtering) {
   249     ScaleRowDown38_3 = ScaleRowDown38_C;
   250     ScaleRowDown38_2 = ScaleRowDown38_C;
   251   } else {
   252     ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
   253     ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
   254   }
   255 #if defined(HAS_SCALEROWDOWN38_NEON)
   256   if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
   257     if (!filtering) {
   258       ScaleRowDown38_3 = ScaleRowDown38_NEON;
   259       ScaleRowDown38_2 = ScaleRowDown38_NEON;
   260     } else {
   261       ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
   262       ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
   263     }
   264   }
   265 #elif defined(HAS_SCALEROWDOWN38_SSSE3)
   266   if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
   267       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   268     if (!filtering) {
   269       ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
   270       ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
   271     } else {
   272       ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
   273       ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
   274     }
   275   }
   276 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
   277   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
   278       IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
   279       IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
   280     if (!filtering) {
   281       ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
   282       ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
   283     } else {
   284       ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
   285       ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
   286     }
   287   }
   288 #endif
   290   const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
   291   int y;
   292   for (y = 0; y < dst_height - 2; y += 3) {
   293     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
   294     src_ptr += src_stride * 3;
   295     dst_ptr += dst_stride;
   296     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
   297     src_ptr += src_stride * 3;
   298     dst_ptr += dst_stride;
   299     ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
   300     src_ptr += src_stride * 2;
   301     dst_ptr += dst_stride;
   302   }
   304   // Remainder 1 or 2 rows with last row vertically unfiltered
   305   if ((dst_height % 3) == 2) {
   306     ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
   307     src_ptr += src_stride * 3;
   308     dst_ptr += dst_stride;
   309     ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
   310   } else if ((dst_height % 3) == 1) {
   311     ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
   312   }
   313 }
   315 static __inline uint32 SumBox(int iboxwidth, int iboxheight,
   316                               ptrdiff_t src_stride, const uint8* src_ptr) {
   317   assert(iboxwidth > 0);
   318   assert(iboxheight > 0);
   319   uint32 sum = 0u;
   320   int y;
   321   for (y = 0; y < iboxheight; ++y) {
   322     int x;
   323     for (x = 0; x < iboxwidth; ++x) {
   324       sum += src_ptr[x];
   325     }
   326     src_ptr += src_stride;
   327   }
   328   return sum;
   329 }
   331 static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
   332                                int x, int dx, ptrdiff_t src_stride,
   333                                const uint8* src_ptr, uint8* dst_ptr) {
   334   int i;
   335   for (i = 0; i < dst_width; ++i) {
   336     int ix = x >> 16;
   337     x += dx;
   338     int boxwidth = (x >> 16) - ix;
   339     *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
   340         (boxwidth * boxheight);
   341   }
   342 }
   344 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
   345   assert(iboxwidth > 0);
   346   uint32 sum = 0u;
   347   int x;
   348   for (x = 0; x < iboxwidth; ++x) {
   349     sum += src_ptr[x];
   350   }
   351   return sum;
   352 }
   354 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
   355                             const uint16* src_ptr, uint8* dst_ptr) {
   356   int scaletbl[2];
   357   int minboxwidth = (dx >> 16);
   358   scaletbl[0] = 65536 / (minboxwidth * boxheight);
   359   scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
   360   int* scaleptr = scaletbl - minboxwidth;
   361   int i;
   362   for (i = 0; i < dst_width; ++i) {
   363     int ix = x >> 16;
   364     x += dx;
   365     int boxwidth = (x >> 16) - ix;
   366     *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
   367   }
   368 }
   370 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
   371                             const uint16* src_ptr, uint8* dst_ptr) {
   372   int boxwidth = (dx >> 16);
   373   int scaleval = 65536 / (boxwidth * boxheight);
   374   int i;
   375   for (i = 0; i < dst_width; ++i) {
   376     *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
   377     x += boxwidth;
   378   }
   379 }
   381 // Scale plane down to any dimensions, with interpolation.
   382 // (boxfilter).
   383 //
   384 // Same method as SimpleScale, which is fixed point, outputting
   385 // one pixel of destination using fixed point (16.16) to step
   386 // through source, sampling a box of pixel with simple
   387 // averaging.
   388 static void ScalePlaneBox(int src_width, int src_height,
   389                           int dst_width, int dst_height,
   390                           int src_stride, int dst_stride,
   391                           const uint8* src_ptr, uint8* dst_ptr) {
   392   // Initial source x/y coordinate and step values as 16.16 fixed point.
   393   int x = 0;
   394   int y = 0;
   395   int dx = 0;
   396   int dy = 0;
   397   ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
   398              &x, &y, &dx, &dy);
   399   src_width = Abs(src_width);
   400   const int max_y = (src_height << 16);
   401   // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
   402   if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
   403     uint8* dst = dst_ptr;
   404     int j;
   405     for (j = 0; j < dst_height; ++j) {
   406       int iy = y >> 16;
   407       const uint8* src = src_ptr + iy * src_stride;
   408       y += dy;
   409       if (y > max_y) {
   410         y = max_y;
   411       }
   412       int boxheight = (y >> 16) - iy;
   413       ScalePlaneBoxRow_C(dst_width, boxheight,
   414                          x, dx, src_stride,
   415                          src, dst);
   416       dst += dst_stride;
   417     }
   418     return;
   419   }
   420   // Allocate a row buffer of uint16.
   421   align_buffer_64(row16, src_width * 2);
   423   void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
   424       const uint16* src_ptr, uint8* dst_ptr) =
   425       (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
   426   void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
   427       uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
   428 #if defined(HAS_SCALEADDROWS_SSE2)
   429   if (TestCpuFlag(kCpuHasSSE2) &&
   430 #ifdef AVOID_OVERREAD
   431       IS_ALIGNED(src_width, 16) &&
   432 #endif
   433       IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   434     ScaleAddRows = ScaleAddRows_SSE2;
   435   }
   436 #endif
   438   int j;
   439   for (j = 0; j < dst_height; ++j) {
   440     int iy = y >> 16;
   441     const uint8* src = src_ptr + iy * src_stride;
   442     y += dy;
   443     if (y > (src_height << 16)) {
   444       y = (src_height << 16);
   445     }
   446     int boxheight = (y >> 16) - iy;
   447     ScaleAddRows(src, src_stride, (uint16*)(row16),
   448                  src_width, boxheight);
   449     ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
   450                  dst_ptr);
   451     dst_ptr += dst_stride;
   452   }
   453   free_aligned_buffer_64(row16);
   454 }
   456 // Scale plane down with bilinear interpolation.
   457 void ScalePlaneBilinearDown(int src_width, int src_height,
   458                             int dst_width, int dst_height,
   459                             int src_stride, int dst_stride,
   460                             const uint8* src_ptr, uint8* dst_ptr,
   461                             enum FilterMode filtering) {
   462   // Initial source x/y coordinate and step values as 16.16 fixed point.
   463   int x = 0;
   464   int y = 0;
   465   int dx = 0;
   466   int dy = 0;
   467   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
   468              &x, &y, &dx, &dy);
   469   src_width = Abs(src_width);
   471   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   472       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   473       InterpolateRow_C;
   474 #if defined(HAS_INTERPOLATEROW_SSE2)
   475   if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
   476     InterpolateRow = InterpolateRow_Any_SSE2;
   477     if (IS_ALIGNED(src_width, 16)) {
   478       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   479       if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   480         InterpolateRow = InterpolateRow_SSE2;
   481       }
   482     }
   483   }
   484 #endif
   485 #if defined(HAS_INTERPOLATEROW_SSSE3)
   486   if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
   487     InterpolateRow = InterpolateRow_Any_SSSE3;
   488     if (IS_ALIGNED(src_width, 16)) {
   489       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   490       if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
   491         InterpolateRow = InterpolateRow_SSSE3;
   492       }
   493     }
   494   }
   495 #endif
   496 #if defined(HAS_INTERPOLATEROW_AVX2)
   497   if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
   498     InterpolateRow = InterpolateRow_Any_AVX2;
   499     if (IS_ALIGNED(src_width, 32)) {
   500       InterpolateRow = InterpolateRow_AVX2;
   501     }
   502   }
   503 #endif
   504 #if defined(HAS_INTERPOLATEROW_NEON)
   505   if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
   506     InterpolateRow = InterpolateRow_Any_NEON;
   507     if (IS_ALIGNED(src_width, 16)) {
   508       InterpolateRow = InterpolateRow_NEON;
   509     }
   510   }
   511 #endif
   512 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
   513   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
   514     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
   515     if (IS_ALIGNED(src_width, 4)) {
   516       InterpolateRow = InterpolateRow_MIPS_DSPR2;
   517     }
   518   }
   519 #endif
   521   void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
   522       int dst_width, int x, int dx) =
   523       (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
   525 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
   526   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   527     ScaleFilterCols = ScaleFilterCols_SSSE3;
   528   }
   529 #endif
   531   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
   532   // Allocate a row buffer.
   533   align_buffer_64(row, src_width);
   535   const int max_y = (src_height - 1) << 16;
   536   int j;
   537   for (j = 0; j < dst_height; ++j) {
   538     if (y > max_y) {
   539       y = max_y;
   540     }
   541     int yi = y >> 16;
   542     const uint8* src = src_ptr + yi * src_stride;
   543     if (filtering == kFilterLinear) {
   544       ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
   545     } else {
   546       int yf = (y >> 8) & 255;
   547       InterpolateRow(row, src, src_stride, src_width, yf);
   548       ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
   549     }
   550     dst_ptr += dst_stride;
   551     y += dy;
   552   }
   553   free_aligned_buffer_64(row);
   554 }
   556 // Scale up down with bilinear interpolation.
   557 void ScalePlaneBilinearUp(int src_width, int src_height,
   558                           int dst_width, int dst_height,
   559                           int src_stride, int dst_stride,
   560                           const uint8* src_ptr, uint8* dst_ptr,
   561                           enum FilterMode filtering) {
   562   // Initial source x/y coordinate and step values as 16.16 fixed point.
   563   int x = 0;
   564   int y = 0;
   565   int dx = 0;
   566   int dy = 0;
   567   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
   568              &x, &y, &dx, &dy);
   569   src_width = Abs(src_width);
   571   void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
   572       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   573       InterpolateRow_C;
   574 #if defined(HAS_INTERPOLATEROW_SSE2)
   575   if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
   576     InterpolateRow = InterpolateRow_Any_SSE2;
   577     if (IS_ALIGNED(dst_width, 16)) {
   578       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   579       if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   580         InterpolateRow = InterpolateRow_SSE2;
   581       }
   582     }
   583   }
   584 #endif
   585 #if defined(HAS_INTERPOLATEROW_SSSE3)
   586   if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
   587     InterpolateRow = InterpolateRow_Any_SSSE3;
   588     if (IS_ALIGNED(dst_width, 16)) {
   589       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   590       if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   591         InterpolateRow = InterpolateRow_SSSE3;
   592       }
   593     }
   594   }
   595 #endif
   596 #if defined(HAS_INTERPOLATEROW_AVX2)
   597   if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
   598     InterpolateRow = InterpolateRow_Any_AVX2;
   599     if (IS_ALIGNED(dst_width, 32)) {
   600       InterpolateRow = InterpolateRow_AVX2;
   601     }
   602   }
   603 #endif
   604 #if defined(HAS_INTERPOLATEROW_NEON)
   605   if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
   606     InterpolateRow = InterpolateRow_Any_NEON;
   607     if (IS_ALIGNED(dst_width, 16)) {
   608       InterpolateRow = InterpolateRow_NEON;
   609     }
   610   }
   611 #endif
   612 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
   613   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
   614     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
   615     if (IS_ALIGNED(dst_width, 4)) {
   616       InterpolateRow = InterpolateRow_MIPS_DSPR2;
   617     }
   618   }
   619 #endif
   621   void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
   622        int dst_width, int x, int dx) =
   623        filtering ? ScaleFilterCols_C : ScaleCols_C;
   624   if (filtering && src_width >= 32768) {
   625     ScaleFilterCols = ScaleFilterCols64_C;
   626   }
   627 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
   628   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
   629     ScaleFilterCols = ScaleFilterCols_SSSE3;
   630   }
   631 #endif
   632   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
   633     ScaleFilterCols = ScaleColsUp2_C;
   634 #if defined(HAS_SCALECOLS_SSE2)
   635     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   636         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
   637         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   638       ScaleFilterCols = ScaleColsUp2_SSE2;
   639     }
   640 #endif
   641   }
   643   const int max_y = (src_height - 1) << 16;
   644   if (y > max_y) {
   645     y = max_y;
   646   }
   647   int yi = y >> 16;
   648   const uint8* src = src_ptr + yi * src_stride;
   650   // Allocate 2 row buffers.
   651   const int kRowSize = (dst_width + 15) & ~15;
   652   align_buffer_64(row, kRowSize * 2);
   654   uint8* rowptr = row;
   655   int rowstride = kRowSize;
   656   int lasty = yi;
   658   ScaleFilterCols(rowptr, src, dst_width, x, dx);
   659   if (src_height > 1) {
   660     src += src_stride;
   661   }
   662   ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
   663   src += src_stride;
   665   int j;
   666   for (j = 0; j < dst_height; ++j) {
   667     yi = y >> 16;
   668     if (yi != lasty) {
   669       if (y > max_y) {
   670         y = max_y;
   671         yi = y >> 16;
   672         src = src_ptr + yi * src_stride;
   673       }
   674       if (yi != lasty) {
   675         ScaleFilterCols(rowptr, src, dst_width, x, dx);
   676         rowptr += rowstride;
   677         rowstride = -rowstride;
   678         lasty = yi;
   679         src += src_stride;
   680       }
   681     }
   682     if (filtering == kFilterLinear) {
   683       InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
   684     } else {
   685       int yf = (y >> 8) & 255;
   686       InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
   687     }
   688     dst_ptr += dst_stride;
   689     y += dy;
   690   }
   691   free_aligned_buffer_64(row);
   692 }
   694 // Scale Plane to/from any dimensions, without interpolation.
   695 // Fixed point math is used for performance: The upper 16 bits
   696 // of x and dx is the integer part of the source position and
   697 // the lower 16 bits are the fixed decimal part.
   699 static void ScalePlaneSimple(int src_width, int src_height,
   700                              int dst_width, int dst_height,
   701                              int src_stride, int dst_stride,
   702                              const uint8* src_ptr, uint8* dst_ptr) {
   703   // Initial source x/y coordinate and step values as 16.16 fixed point.
   704   int x = 0;
   705   int y = 0;
   706   int dx = 0;
   707   int dy = 0;
   708   ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
   709              &x, &y, &dx, &dy);
   710   src_width = Abs(src_width);
   712   void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
   713       int dst_width, int x, int dx) = ScaleCols_C;
   714   if (src_width * 2 == dst_width && x < 0x8000) {
   715     ScaleCols = ScaleColsUp2_C;
   716 #if defined(HAS_SCALECOLS_SSE2)
   717     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
   718         IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
   719         IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
   720       ScaleCols = ScaleColsUp2_SSE2;
   721     }
   722 #endif
   723   }
   725   int i;
   726   for (i = 0; i < dst_height; ++i) {
   727     ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
   728               dst_width, x, dx);
   729     dst_ptr += dst_stride;
   730     y += dy;
   731   }
   732 }
   734 // Scale a plane.
   735 // This function dispatches to a specialized scaler based on scale factor.
   737 LIBYUV_API
   738 void ScalePlane(const uint8* src, int src_stride,
   739                 int src_width, int src_height,
   740                 uint8* dst, int dst_stride,
   741                 int dst_width, int dst_height,
   742                 enum FilterMode filtering) {
   743   // Simplify filtering when possible.
   744   filtering = ScaleFilterReduce(src_width, src_height,
   745                                 dst_width, dst_height,
   746                                 filtering);
   748   // Negative height means invert the image.
   749   if (src_height < 0) {
   750     src_height = -src_height;
   751     src = src + (src_height - 1) * src_stride;
   752     src_stride = -src_stride;
   753   }
   755   // Use specialized scales to improve performance for common resolutions.
   756   // For example, all the 1/2 scalings will use ScalePlaneDown2()
   757   if (dst_width == src_width && dst_height == src_height) {
   758     // Straight copy.
   759     CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
   760     return;
   761   }
   762   if (dst_width == src_width) {
   763     int dy = FixedDiv(src_height, dst_height);
   764     // Arbitrary scale vertically, but unscaled vertically.
   765     ScalePlaneVertical(src_height,
   766                        dst_width, dst_height,
   767                        src_stride, dst_stride, src, dst,
   768                        0, 0, dy, 1, filtering);
   769     return;
   770   }
   771   if (dst_width <= Abs(src_width) && dst_height <= src_height) {
   772     // Scale down.
   773     if (4 * dst_width == 3 * src_width &&
   774         4 * dst_height == 3 * src_height) {
   775       // optimized, 3/4
   776       ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
   777                        src_stride, dst_stride, src, dst, filtering);
   778       return;
   779     }
   780     if (2 * dst_width == src_width && 2 * dst_height == src_height) {
   781       // optimized, 1/2
   782       ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
   783                       src_stride, dst_stride, src, dst, filtering);
   784       return;
   785     }
   786     // 3/8 rounded up for odd sized chroma height.
   787     if (8 * dst_width == 3 * src_width &&
   788         dst_height == ((src_height * 3 + 7) / 8)) {
   789       // optimized, 3/8
   790       ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
   791                        src_stride, dst_stride, src, dst, filtering);
   792       return;
   793     }
   794     if (4 * dst_width == src_width && 4 * dst_height == src_height &&
   795                filtering != kFilterBilinear) {
   796       // optimized, 1/4
   797       ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
   798                       src_stride, dst_stride, src, dst, filtering);
   799       return;
   800     }
   801   }
   802   if (filtering == kFilterBox && dst_height * 2 < src_height) {
   803     ScalePlaneBox(src_width, src_height, dst_width, dst_height,
   804                   src_stride, dst_stride, src, dst);
   805     return;
   806   }
   807   if (filtering && dst_height > src_height) {
   808     ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
   809                          src_stride, dst_stride, src, dst, filtering);
   810     return;
   811   }
   812   if (filtering) {
   813     ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
   814                            src_stride, dst_stride, src, dst, filtering);
   815     return;
   816   }
   817   ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
   818                    src_stride, dst_stride, src, dst);
   819 }
   821 // Scale an I420 image.
   822 // This function in turn calls a scaling function for each plane.
   824 LIBYUV_API
   825 int I420Scale(const uint8* src_y, int src_stride_y,
   826               const uint8* src_u, int src_stride_u,
   827               const uint8* src_v, int src_stride_v,
   828               int src_width, int src_height,
   829               uint8* dst_y, int dst_stride_y,
   830               uint8* dst_u, int dst_stride_u,
   831               uint8* dst_v, int dst_stride_v,
   832               int dst_width, int dst_height,
   833               enum FilterMode filtering) {
   834   if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
   835       !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
   836     return -1;
   837   }
   838   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   839   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   840   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   841   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
   843   ScalePlane(src_y, src_stride_y, src_width, src_height,
   844              dst_y, dst_stride_y, dst_width, dst_height,
   845              filtering);
   846   ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
   847              dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
   848              filtering);
   849   ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
   850              dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
   851              filtering);
   852   return 0;
   853 }
   855 // Deprecated api
   856 LIBYUV_API
   857 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
   858           int src_stride_y, int src_stride_u, int src_stride_v,
   859           int src_width, int src_height,
   860           uint8* dst_y, uint8* dst_u, uint8* dst_v,
   861           int dst_stride_y, int dst_stride_u, int dst_stride_v,
   862           int dst_width, int dst_height,
   863           LIBYUV_BOOL interpolate) {
   864   return I420Scale(src_y, src_stride_y,
   865                    src_u, src_stride_u,
   866                    src_v, src_stride_v,
   867                    src_width, src_height,
   868                    dst_y, dst_stride_y,
   869                    dst_u, dst_stride_u,
   870                    dst_v, dst_stride_v,
   871                    dst_width, dst_height,
   872                    interpolate ? kFilterBox : kFilterNone);
   873 }
   875 // Deprecated api
   876 LIBYUV_API
   877 int ScaleOffset(const uint8* src, int src_width, int src_height,
   878                 uint8* dst, int dst_width, int dst_height, int dst_yoffset,
   879                 LIBYUV_BOOL interpolate) {
   880   if (!src || src_width <= 0 || src_height <= 0 ||
   881       !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset < 0 ||
   882       dst_yoffset >= dst_height) {
   883     return -1;
   884   }
   885   dst_yoffset = dst_yoffset & ~1;  // chroma requires offset to multiple of 2.
   886   int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
   887   int src_halfheight = SUBSAMPLE(src_height, 1, 1);
   888   int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
   889   int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
   890   int aheight = dst_height - dst_yoffset * 2;  // actual output height
   891   const uint8* src_y = src;
   892   const uint8* src_u = src + src_width * src_height;
   893   const uint8* src_v = src + src_width * src_height +
   894                              src_halfwidth * src_halfheight;
   895   uint8* dst_y = dst + dst_yoffset * dst_width;
   896   uint8* dst_u = dst + dst_width * dst_height +
   897                  (dst_yoffset >> 1) * dst_halfwidth;
   898   uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
   899                  (dst_yoffset >> 1) * dst_halfwidth;
   900   return I420Scale(src_y, src_width,
   901                    src_u, src_halfwidth,
   902                    src_v, src_halfwidth,
   903                    src_width, src_height,
   904                    dst_y, dst_width,
   905                    dst_u, dst_halfwidth,
   906                    dst_v, dst_halfwidth,
   907                    dst_width, aheight,
   908                    interpolate ? kFilterBox : kFilterNone);
   909 }
   911 #ifdef __cplusplus
   912 }  // extern "C"
   913 }  // namespace libyuv
   914 #endif

mercurial