media/libyuv/source/scale_common.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS. All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "libyuv/scale.h"
    13 #include <assert.h>
    14 #include <string.h>
    16 #include "libyuv/cpu_id.h"
    17 #include "libyuv/planar_functions.h"  // For CopyARGB
    18 #include "libyuv/row.h"
    19 #include "libyuv/scale_row.h"
    21 #ifdef __cplusplus
    22 namespace libyuv {
    23 extern "C" {
    24 #endif
    26 static __inline int Abs(int v) {
    27   return v >= 0 ? v : -v;
    28 }
    30 // CPU agnostic row functions
    31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,
    32                      uint8* dst, int dst_width) {
    33   int x;
    34   for (x = 0; x < dst_width - 1; x += 2) {
    35     dst[0] = src_ptr[1];
    36     dst[1] = src_ptr[3];
    37     dst += 2;
    38     src_ptr += 4;
    39   }
    40   if (dst_width & 1) {
    41     dst[0] = src_ptr[1];
    42   }
    43 }
    45 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,
    46                            uint8* dst, int dst_width) {
    47   const uint8* s = src_ptr;
    48   int x;
    49   for (x = 0; x < dst_width - 1; x += 2) {
    50     dst[0] = (s[0] + s[1] + 1) >> 1;
    51     dst[1] = (s[2] + s[3] + 1) >> 1;
    52     dst += 2;
    53     s += 4;
    54   }
    55   if (dst_width & 1) {
    56     dst[0] = (s[0] + s[1] + 1) >> 1;
    57   }
    58 }
    60 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
    61                         uint8* dst, int dst_width) {
    62   const uint8* s = src_ptr;
    63   const uint8* t = src_ptr + src_stride;
    64   int x;
    65   for (x = 0; x < dst_width - 1; x += 2) {
    66     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    67     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
    68     dst += 2;
    69     s += 4;
    70     t += 4;
    71   }
    72   if (dst_width & 1) {
    73     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
    74   }
    75 }
    77 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
    78                      uint8* dst, int dst_width) {
    79   int x;
    80   for (x = 0; x < dst_width - 1; x += 2) {
    81     dst[0] = src_ptr[2];
    82     dst[1] = src_ptr[6];
    83     dst += 2;
    84     src_ptr += 8;
    85   }
    86   if (dst_width & 1) {
    87     dst[0] = src_ptr[2];
    88   }
    89 }
    91 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
    92                         uint8* dst, int dst_width) {
    93   intptr_t stride = src_stride;
    94   int x;
    95   for (x = 0; x < dst_width - 1; x += 2) {
    96     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
    97              src_ptr[stride + 0] + src_ptr[stride + 1] +
    98              src_ptr[stride + 2] + src_ptr[stride + 3] +
    99              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
   100              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
   101              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
   102              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
   103              8) >> 4;
   104     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
   105              src_ptr[stride + 4] + src_ptr[stride + 5] +
   106              src_ptr[stride + 6] + src_ptr[stride + 7] +
   107              src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +
   108              src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +
   109              src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +
   110              src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +
   111              8) >> 4;
   112     dst += 2;
   113     src_ptr += 8;
   114   }
   115   if (dst_width & 1) {
   116     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
   117              src_ptr[stride + 0] + src_ptr[stride + 1] +
   118              src_ptr[stride + 2] + src_ptr[stride + 3] +
   119              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +
   120              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +
   121              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +
   122              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +
   123              8) >> 4;
   124   }
   125 }
   127 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,
   128                       uint8* dst, int dst_width) {
   129   int x;
   130   assert((dst_width % 3 == 0) && (dst_width > 0));
   131   for (x = 0; x < dst_width; x += 3) {
   132     dst[0] = src_ptr[0];
   133     dst[1] = src_ptr[1];
   134     dst[2] = src_ptr[3];
   135     dst += 3;
   136     src_ptr += 4;
   137   }
   138 }
   140 // Filter rows 0 and 1 together, 3 : 1
   141 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
   142                             uint8* d, int dst_width) {
   143   const uint8* s = src_ptr;
   144   const uint8* t = src_ptr + src_stride;
   145   int x;
   146   assert((dst_width % 3 == 0) && (dst_width > 0));
   147   for (x = 0; x < dst_width; x += 3) {
   148     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
   149     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
   150     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
   151     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
   152     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
   153     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
   154     d[0] = (a0 * 3 + b0 + 2) >> 2;
   155     d[1] = (a1 * 3 + b1 + 2) >> 2;
   156     d[2] = (a2 * 3 + b2 + 2) >> 2;
   157     d += 3;
   158     s += 4;
   159     t += 4;
   160   }
   161 }
   163 // Filter rows 1 and 2 together, 1 : 1
   164 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
   165                             uint8* d, int dst_width) {
   166   const uint8* s = src_ptr;
   167   const uint8* t = src_ptr + src_stride;
   168   int x;
   169   assert((dst_width % 3 == 0) && (dst_width > 0));
   170   for (x = 0; x < dst_width; x += 3) {
   171     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
   172     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
   173     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
   174     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
   175     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
   176     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
   177     d[0] = (a0 + b0 + 1) >> 1;
   178     d[1] = (a1 + b1 + 1) >> 1;
   179     d[2] = (a2 + b2 + 1) >> 1;
   180     d += 3;
   181     s += 4;
   182     t += 4;
   183   }
   184 }
   186 // Scales a single row of pixels using point sampling.
   187 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,
   188                  int dst_width, int x, int dx) {
   189   int j;
   190   for (j = 0; j < dst_width - 1; j += 2) {
   191     dst_ptr[0] = src_ptr[x >> 16];
   192     x += dx;
   193     dst_ptr[1] = src_ptr[x >> 16];
   194     x += dx;
   195     dst_ptr += 2;
   196   }
   197   if (dst_width & 1) {
   198     dst_ptr[0] = src_ptr[x >> 16];
   199   }
   200 }
   202 // Scales a single row of pixels up by 2x using point sampling.
   203 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,
   204                     int dst_width, int x, int dx) {
   205   int j;
   206   for (j = 0; j < dst_width - 1; j += 2) {
   207     dst_ptr[1] = dst_ptr[0] = src_ptr[0];
   208     src_ptr += 1;
   209     dst_ptr += 2;
   210   }
   211   if (dst_width & 1) {
   212     dst_ptr[0] = src_ptr[0];
   213   }
   214 }
   216 // (1-f)a + fb can be replaced with a + f(b-a)
   217 #define BLENDER(a, b, f) (uint8)((int)(a) + \
   218     ((int)(f) * ((int)(b) - (int)(a)) >> 16))
   220 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,
   221                        int dst_width, int x, int dx) {
   222   int j;
   223   for (j = 0; j < dst_width - 1; j += 2) {
   224     int xi = x >> 16;
   225     int a = src_ptr[xi];
   226     int b = src_ptr[xi + 1];
   227     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
   228     x += dx;
   229     xi = x >> 16;
   230     a = src_ptr[xi];
   231     b = src_ptr[xi + 1];
   232     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
   233     x += dx;
   234     dst_ptr += 2;
   235   }
   236   if (dst_width & 1) {
   237     int xi = x >> 16;
   238     int a = src_ptr[xi];
   239     int b = src_ptr[xi + 1];
   240     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
   241   }
   242 }
   244 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,
   245                          int dst_width, int x32, int dx) {
   246   int64 x = (int64)(x32);
   247   int j;
   248   for (j = 0; j < dst_width - 1; j += 2) {
   249     int64 xi = x >> 16;
   250     int a = src_ptr[xi];
   251     int b = src_ptr[xi + 1];
   252     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
   253     x += dx;
   254     xi = x >> 16;
   255     a = src_ptr[xi];
   256     b = src_ptr[xi + 1];
   257     dst_ptr[1] = BLENDER(a, b, x & 0xffff);
   258     x += dx;
   259     dst_ptr += 2;
   260   }
   261   if (dst_width & 1) {
   262     int64 xi = x >> 16;
   263     int a = src_ptr[xi];
   264     int b = src_ptr[xi + 1];
   265     dst_ptr[0] = BLENDER(a, b, x & 0xffff);
   266   }
   267 }
   268 #undef BLENDER
   270 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,
   271                       uint8* dst, int dst_width) {
   272   int x;
   273   assert(dst_width % 3 == 0);
   274   for (x = 0; x < dst_width; x += 3) {
   275     dst[0] = src_ptr[0];
   276     dst[1] = src_ptr[3];
   277     dst[2] = src_ptr[6];
   278     dst += 3;
   279     src_ptr += 8;
   280   }
   281 }
   283 // 8x3 -> 3x1
   284 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
   285                             ptrdiff_t src_stride,
   286                             uint8* dst_ptr, int dst_width) {
   287   intptr_t stride = src_stride;
   288   int i;
   289   assert((dst_width % 3 == 0) && (dst_width > 0));
   290   for (i = 0; i < dst_width; i += 3) {
   291     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
   292         src_ptr[stride + 0] + src_ptr[stride + 1] +
   293         src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
   294         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
   295         (65536 / 9) >> 16;
   296     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
   297         src_ptr[stride + 3] + src_ptr[stride + 4] +
   298         src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
   299         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
   300         (65536 / 9) >> 16;
   301     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
   302         src_ptr[stride + 6] + src_ptr[stride + 7] +
   303         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
   304         (65536 / 6) >> 16;
   305     src_ptr += 8;
   306     dst_ptr += 3;
   307   }
   308 }
   310 // 8x2 -> 3x1
   311 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
   312                             uint8* dst_ptr, int dst_width) {
   313   intptr_t stride = src_stride;
   314   int i;
   315   assert((dst_width % 3 == 0) && (dst_width > 0));
   316   for (i = 0; i < dst_width; i += 3) {
   317     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +
   318         src_ptr[stride + 0] + src_ptr[stride + 1] +
   319         src_ptr[stride + 2]) * (65536 / 6) >> 16;
   320     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +
   321         src_ptr[stride + 3] + src_ptr[stride + 4] +
   322         src_ptr[stride + 5]) * (65536 / 6) >> 16;
   323     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +
   324         src_ptr[stride + 6] + src_ptr[stride + 7]) *
   325         (65536 / 4) >> 16;
   326     src_ptr += 8;
   327     dst_ptr += 3;
   328   }
   329 }
   331 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,
   332                     uint16* dst_ptr, int src_width, int src_height) {
   333   int x;
   334   assert(src_width > 0);
   335   assert(src_height > 0);
   336   for (x = 0; x < src_width; ++x) {
   337     const uint8* s = src_ptr + x;
   338     unsigned int sum = 0u;
   339     int y;
   340     for (y = 0; y < src_height; ++y) {
   341       sum += s[0];
   342       s += src_stride;
   343     }
   344     // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.
   345     dst_ptr[x] = sum < 65535u ? sum : 65535u;
   346   }
   347 }
   349 void ScaleARGBRowDown2_C(const uint8* src_argb,
   350                          ptrdiff_t src_stride,
   351                          uint8* dst_argb, int dst_width) {
   352   const uint32* src = (const uint32*)(src_argb);
   353   uint32* dst = (uint32*)(dst_argb);
   355   int x;
   356   for (x = 0; x < dst_width - 1; x += 2) {
   357     dst[0] = src[1];
   358     dst[1] = src[3];
   359     src += 4;
   360     dst += 2;
   361   }
   362   if (dst_width & 1) {
   363     dst[0] = src[1];
   364   }
   365 }
   367 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
   368                                ptrdiff_t src_stride,
   369                                uint8* dst_argb, int dst_width) {
   370   int x;
   371   for (x = 0; x < dst_width; ++x) {
   372     dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
   373     dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
   374     dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
   375     dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
   376     src_argb += 8;
   377     dst_argb += 4;
   378   }
   379 }
   381 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,
   382                             uint8* dst_argb, int dst_width) {
   383   int x;
   384   for (x = 0; x < dst_width; ++x) {
   385     dst_argb[0] = (src_argb[0] + src_argb[4] +
   386                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
   387     dst_argb[1] = (src_argb[1] + src_argb[5] +
   388                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
   389     dst_argb[2] = (src_argb[2] + src_argb[6] +
   390                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
   391     dst_argb[3] = (src_argb[3] + src_argb[7] +
   392                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
   393     src_argb += 8;
   394     dst_argb += 4;
   395   }
   396 }
   398 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,
   399                             int src_stepx,
   400                             uint8* dst_argb, int dst_width) {
   401   const uint32* src = (const uint32*)(src_argb);
   402   uint32* dst = (uint32*)(dst_argb);
   404   int x;
   405   for (x = 0; x < dst_width - 1; x += 2) {
   406     dst[0] = src[0];
   407     dst[1] = src[src_stepx];
   408     src += src_stepx * 2;
   409     dst += 2;
   410   }
   411   if (dst_width & 1) {
   412     dst[0] = src[0];
   413   }
   414 }
   416 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
   417                                ptrdiff_t src_stride,
   418                                int src_stepx,
   419                                uint8* dst_argb, int dst_width) {
   420   int x;
   421   for (x = 0; x < dst_width; ++x) {
   422     dst_argb[0] = (src_argb[0] + src_argb[4] +
   423                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;
   424     dst_argb[1] = (src_argb[1] + src_argb[5] +
   425                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;
   426     dst_argb[2] = (src_argb[2] + src_argb[6] +
   427                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;
   428     dst_argb[3] = (src_argb[3] + src_argb[7] +
   429                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;
   430     src_argb += src_stepx * 4;
   431     dst_argb += 4;
   432   }
   433 }
   435 // Scales a single row of pixels using point sampling.
   436 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,
   437                      int dst_width, int x, int dx) {
   438   const uint32* src = (const uint32*)(src_argb);
   439   uint32* dst = (uint32*)(dst_argb);
   440   int j;
   441   for (j = 0; j < dst_width - 1; j += 2) {
   442     dst[0] = src[x >> 16];
   443     x += dx;
   444     dst[1] = src[x >> 16];
   445     x += dx;
   446     dst += 2;
   447   }
   448   if (dst_width & 1) {
   449     dst[0] = src[x >> 16];
   450   }
   451 }
   453 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,
   454                        int dst_width, int x32, int dx) {
   455   int64 x = (int64)(x32);
   456   const uint32* src = (const uint32*)(src_argb);
   457   uint32* dst = (uint32*)(dst_argb);
   458   int j;
   459   for (j = 0; j < dst_width - 1; j += 2) {
   460     dst[0] = src[x >> 16];
   461     x += dx;
   462     dst[1] = src[x >> 16];
   463     x += dx;
   464     dst += 2;
   465   }
   466   if (dst_width & 1) {
   467     dst[0] = src[x >> 16];
   468   }
   469 }
   471 // Scales a single row of pixels up by 2x using point sampling.
   472 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,
   473                         int dst_width, int x, int dx) {
   474   const uint32* src = (const uint32*)(src_argb);
   475   uint32* dst = (uint32*)(dst_argb);
   476   int j;
   477   for (j = 0; j < dst_width - 1; j += 2) {
   478     dst[1] = dst[0] = src[0];
   479     src += 1;
   480     dst += 2;
   481   }
   482   if (dst_width & 1) {
   483     dst[0] = src[0];
   484   }
   485 }
   487 // Mimics SSSE3 blender
   488 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
   489 #define BLENDERC(a, b, f, s) (uint32)( \
   490     BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
   491 #define BLENDER(a, b, f) \
   492     BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \
   493     BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
   495 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,
   496                            int dst_width, int x, int dx) {
   497   const uint32* src = (const uint32*)(src_argb);
   498   uint32* dst = (uint32*)(dst_argb);
   499   int j;
   500   for (j = 0; j < dst_width - 1; j += 2) {
   501     int xi = x >> 16;
   502     int xf = (x >> 9) & 0x7f;
   503     uint32 a = src[xi];
   504     uint32 b = src[xi + 1];
   505     dst[0] = BLENDER(a, b, xf);
   506     x += dx;
   507     xi = x >> 16;
   508     xf = (x >> 9) & 0x7f;
   509     a = src[xi];
   510     b = src[xi + 1];
   511     dst[1] = BLENDER(a, b, xf);
   512     x += dx;
   513     dst += 2;
   514   }
   515   if (dst_width & 1) {
   516     int xi = x >> 16;
   517     int xf = (x >> 9) & 0x7f;
   518     uint32 a = src[xi];
   519     uint32 b = src[xi + 1];
   520     dst[0] = BLENDER(a, b, xf);
   521   }
   522 }
   524 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
   525                              int dst_width, int x32, int dx) {
   526   int64 x = (int64)(x32);
   527   const uint32* src = (const uint32*)(src_argb);
   528   uint32* dst = (uint32*)(dst_argb);
   529   int j;
   530   for (j = 0; j < dst_width - 1; j += 2) {
   531     int64 xi = x >> 16;
   532     int xf = (x >> 9) & 0x7f;
   533     uint32 a = src[xi];
   534     uint32 b = src[xi + 1];
   535     dst[0] = BLENDER(a, b, xf);
   536     x += dx;
   537     xi = x >> 16;
   538     xf = (x >> 9) & 0x7f;
   539     a = src[xi];
   540     b = src[xi + 1];
   541     dst[1] = BLENDER(a, b, xf);
   542     x += dx;
   543     dst += 2;
   544   }
   545   if (dst_width & 1) {
   546     int64 xi = x >> 16;
   547     int xf = (x >> 9) & 0x7f;
   548     uint32 a = src[xi];
   549     uint32 b = src[xi + 1];
   550     dst[0] = BLENDER(a, b, xf);
   551   }
   552 }
   553 #undef BLENDER1
   554 #undef BLENDERC
   555 #undef BLENDER
   557 // Scale plane vertically with bilinear interpolation.
   558 void ScalePlaneVertical(int src_height,
   559                         int dst_width, int dst_height,
   560                         int src_stride, int dst_stride,
   561                         const uint8* src_argb, uint8* dst_argb,
   562                         int x, int y, int dy,
   563                         int bpp, enum FilterMode filtering) {
   564   // TODO(fbarchard): Allow higher bpp.
   565   int dst_width_bytes = dst_width * bpp;
   566   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
   567       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
   568       InterpolateRow_C;
   569   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
   570   int j;
   571   assert(bpp >= 1 && bpp <= 4);
   572   assert(src_height != 0);
   573   assert(dst_width > 0);
   574   assert(dst_height > 0);
   575   src_argb += (x >> 16) * bpp;
   576 #if defined(HAS_INTERPOLATEROW_SSE2)
   577   if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {
   578     InterpolateRow = InterpolateRow_Any_SSE2;
   579     if (IS_ALIGNED(dst_width_bytes, 16)) {
   580       InterpolateRow = InterpolateRow_Unaligned_SSE2;
   581       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
   582           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   583         InterpolateRow = InterpolateRow_SSE2;
   584       }
   585     }
   586   }
   587 #endif
   588 #if defined(HAS_INTERPOLATEROW_SSSE3)
   589   if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {
   590     InterpolateRow = InterpolateRow_Any_SSSE3;
   591     if (IS_ALIGNED(dst_width_bytes, 16)) {
   592       InterpolateRow = InterpolateRow_Unaligned_SSSE3;
   593       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
   594           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
   595         InterpolateRow = InterpolateRow_SSSE3;
   596       }
   597     }
   598   }
   599 #endif
   600 #if defined(HAS_INTERPOLATEROW_AVX2)
   601   if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {
   602     InterpolateRow = InterpolateRow_Any_AVX2;
   603     if (IS_ALIGNED(dst_width_bytes, 32)) {
   604       InterpolateRow = InterpolateRow_AVX2;
   605     }
   606   }
   607 #endif
   608 #if defined(HAS_INTERPOLATEROW_NEON)
   609   if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {
   610     InterpolateRow = InterpolateRow_Any_NEON;
   611     if (IS_ALIGNED(dst_width_bytes, 16)) {
   612       InterpolateRow = InterpolateRow_NEON;
   613     }
   614   }
   615 #endif
   616 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
   617   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&
   618       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&
   619       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
   620     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
   621     if (IS_ALIGNED(dst_width_bytes, 4)) {
   622       InterpolateRow = InterpolateRow_MIPS_DSPR2;
   623     }
   624   }
   625 #endif
   626   for (j = 0; j < dst_height; ++j) {
   627     int yi;
   628     int yf;
   629     if (y > max_y) {
   630       y = max_y;
   631     }
   632     yi = y >> 16;
   633     yf = filtering ? ((y >> 8) & 255) : 0;
   634     InterpolateRow(dst_argb, src_argb + yi * src_stride,
   635                    src_stride, dst_width_bytes, yf);
   636     dst_argb += dst_stride;
   637     y += dy;
   638   }
   639 }
   641 // Simplify the filtering based on scale factors.
   642 enum FilterMode ScaleFilterReduce(int src_width, int src_height,
   643                                   int dst_width, int dst_height,
   644                                   enum FilterMode filtering) {
   645   if (src_width < 0) {
   646     src_width = -src_width;
   647   }
   648   if (src_height < 0) {
   649     src_height = -src_height;
   650   }
   651   if (filtering == kFilterBox) {
   652     // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
   653     if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
   654       filtering = kFilterBilinear;
   655     }
   656     // If scaling to larger, switch from Box to Bilinear.
   657     if (dst_width >= src_width || dst_height >= src_height) {
   658       filtering = kFilterBilinear;
   659     }
   660   }
   661   if (filtering == kFilterBilinear) {
   662     if (src_height == 1) {
   663       filtering = kFilterLinear;
   664     }
   665     // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
   666     if (dst_height == src_height || dst_height * 3 == src_height) {
   667       filtering = kFilterLinear;
   668     }
   669     // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
   670     // avoid reading 2 pixels horizontally that causes memory exception.
   671     if (src_width == 1) {
   672       filtering = kFilterNone;
   673     }
   674   }
   675   if (filtering == kFilterLinear) {
   676     if (src_width == 1) {
   677       filtering = kFilterNone;
   678     }
   679     // TODO(fbarchard): Detect any odd scale factor and reduce to None.
   680     if (dst_width == src_width || dst_width * 3 == src_width) {
   681       filtering = kFilterNone;
   682     }
   683   }
   684   return filtering;
   685 }
   687 // Divide num by div and return as 16.16 fixed point result.
   688 int FixedDiv_C(int num, int div) {
   689   return (int)(((int64)(num) << 16) / div);
   690 }
   692 // Divide num by div and return as 16.16 fixed point result.
   693 int FixedDiv1_C(int num, int div) {
   694   return (int)((((int64)(num) << 16) - 0x00010001) /
   695                           (div - 1));
   696 }
   698 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
   700 // Compute slope values for stepping.
   701 void ScaleSlope(int src_width, int src_height,
   702                 int dst_width, int dst_height,
   703                 enum FilterMode filtering,
   704                 int* x, int* y, int* dx, int* dy) {
   705   assert(x != NULL);
   706   assert(y != NULL);
   707   assert(dx != NULL);
   708   assert(dy != NULL);
   709   assert(src_width != 0);
   710   assert(src_height != 0);
   711   assert(dst_width > 0);
   712   assert(dst_height > 0);
   713   // Check for 1 pixel and avoid FixedDiv overflow.
   714   if (dst_width == 1 && src_width >= 32768) {
   715     dst_width = src_width;
   716   }
   717   if (dst_height == 1 && src_height >= 32768) {
   718     dst_height = src_height;
   719   }
   720   if (filtering == kFilterBox) {
   721     // Scale step for point sampling duplicates all pixels equally.
   722     *dx = FixedDiv(Abs(src_width), dst_width);
   723     *dy = FixedDiv(src_height, dst_height);
   724     *x = 0;
   725     *y = 0;
   726   } else if (filtering == kFilterBilinear) {
   727     // Scale step for bilinear sampling renders last pixel once for upsample.
   728     if (dst_width <= Abs(src_width)) {
   729       *dx = FixedDiv(Abs(src_width), dst_width);
   730       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
   731     } else if (dst_width > 1) {
   732       *dx = FixedDiv1(Abs(src_width), dst_width);
   733       *x = 0;
   734     }
   735     if (dst_height <= src_height) {
   736       *dy = FixedDiv(src_height,  dst_height);
   737       *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.
   738     } else if (dst_height > 1) {
   739       *dy = FixedDiv1(src_height, dst_height);
   740       *y = 0;
   741     }
   742   } else if (filtering == kFilterLinear) {
   743     // Scale step for bilinear sampling renders last pixel once for upsample.
   744     if (dst_width <= Abs(src_width)) {
   745       *dx = FixedDiv(Abs(src_width), dst_width);
   746       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.
   747     } else if (dst_width > 1) {
   748       *dx = FixedDiv1(Abs(src_width), dst_width);
   749       *x = 0;
   750     }
   751     *dy = FixedDiv(src_height, dst_height);
   752     *y = *dy >> 1;
   753   } else {
   754     // Scale step for point sampling duplicates all pixels equally.
   755     *dx = FixedDiv(Abs(src_width), dst_width);
   756     *dy = FixedDiv(src_height, dst_height);
   757     *x = CENTERSTART(*dx, 0);
   758     *y = CENTERSTART(*dy, 0);
   759   }
   760   // Negative src_width means horizontally mirror.
   761   if (src_width < 0) {
   762     *x += (dst_width - 1) * *dx;
   763     *dx = -*dx;
   764     // src_width = -src_width;   // Caller must do this.
   765   }
   766 }
   767 #undef CENTERSTART
   769 #ifdef __cplusplus
   770 }  // extern "C"
   771 }  // namespace libyuv
   772 #endif

mercurial