The Tor Browser: media/libyuv/source/scale

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*

     2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.

     3  *

     4  *  Use of this source code is governed by a BSD-style license

     5  *  that can be found in the LICENSE file in the root of the source

     6  *  tree. An additional intellectual property rights grant can be found

     7  *  in the file PATENTS. All contributing project authors may

     8  *  be found in the AUTHORS file in the root of the source tree.

     9  */

    11 #include "libyuv/scale.h"

    13 #include <assert.h>

    14 #include <string.h>

    16 #include "libyuv/cpu_id.h"

    17 #include "libyuv/planar_functions.h"  // For CopyARGB

    18 #include "libyuv/row.h"

    19 #include "libyuv/scale_row.h"

    21 #ifdef __cplusplus

    22 namespace libyuv {

    23 extern "C" {

    24 #endif

    26 static __inline int Abs(int v) {

    27   return v >= 0 ? v : -v;

    28 }

    30 // CPU agnostic row functions

    31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride,

    32                      uint8* dst, int dst_width) {

    33   int x;

    34   for (x = 0; x < dst_width - 1; x += 2) {

    35     dst[0] = src_ptr[1];

    36     dst[1] = src_ptr[3];

    37     dst += 2;

    38     src_ptr += 4;

    39   }

    40   if (dst_width & 1) {

    41     dst[0] = src_ptr[1];

    42   }

    43 }

    45 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride,

    46                            uint8* dst, int dst_width) {

    47   const uint8* s = src_ptr;

    48   int x;

    49   for (x = 0; x < dst_width - 1; x += 2) {

    50     dst[0] = (s[0] + s[1] + 1) >> 1;

    51     dst[1] = (s[2] + s[3] + 1) >> 1;

    52     dst += 2;

    53     s += 4;

    54   }

    55   if (dst_width & 1) {

    56     dst[0] = (s[0] + s[1] + 1) >> 1;

    57   }

    58 }

    60 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,

    61                         uint8* dst, int dst_width) {

    62   const uint8* s = src_ptr;

    63   const uint8* t = src_ptr + src_stride;

    64   int x;

    65   for (x = 0; x < dst_width - 1; x += 2) {

    66     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;

    67     dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;

    68     dst += 2;

    69     s += 4;

    70     t += 4;

    71   }

    72   if (dst_width & 1) {

    73     dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;

    74   }

    75 }

    77 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,

    78                      uint8* dst, int dst_width) {

    79   int x;

    80   for (x = 0; x < dst_width - 1; x += 2) {

    81     dst[0] = src_ptr[2];

    82     dst[1] = src_ptr[6];

    83     dst += 2;

    84     src_ptr += 8;

    85   }

    86   if (dst_width & 1) {

    87     dst[0] = src_ptr[2];

    88   }

    89 }

    91 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride,

    92                         uint8* dst, int dst_width) {

    93   intptr_t stride = src_stride;

    94   int x;

    95   for (x = 0; x < dst_width - 1; x += 2) {

    96     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +

    97              src_ptr[stride + 0] + src_ptr[stride + 1] +

    98              src_ptr[stride + 2] + src_ptr[stride + 3] +

    99              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +

   100              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +

   101              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +

   102              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +

   103              8) >> 4;

   104     dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +

   105              src_ptr[stride + 4] + src_ptr[stride + 5] +

   106              src_ptr[stride + 6] + src_ptr[stride + 7] +

   107              src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] +

   108              src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] +

   109              src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] +

   110              src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] +

   111              8) >> 4;

   112     dst += 2;

   113     src_ptr += 8;

   114   }

   115   if (dst_width & 1) {

   116     dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +

   117              src_ptr[stride + 0] + src_ptr[stride + 1] +

   118              src_ptr[stride + 2] + src_ptr[stride + 3] +

   119              src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] +

   120              src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] +

   121              src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] +

   122              src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] +

   123              8) >> 4;

   124   }

   125 }

   127 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride,

   128                       uint8* dst, int dst_width) {

   129   int x;

   130   assert((dst_width % 3 == 0) && (dst_width > 0));

   131   for (x = 0; x < dst_width; x += 3) {

   132     dst[0] = src_ptr[0];

   133     dst[1] = src_ptr[1];

   134     dst[2] = src_ptr[3];

   135     dst += 3;

   136     src_ptr += 4;

   137   }

   138 }

   140 // Filter rows 0 and 1 together, 3 : 1

   141 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,

   142                             uint8* d, int dst_width) {

   143   const uint8* s = src_ptr;

   144   const uint8* t = src_ptr + src_stride;

   145   int x;

   146   assert((dst_width % 3 == 0) && (dst_width > 0));

   147   for (x = 0; x < dst_width; x += 3) {

   148     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;

   149     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;

   150     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;

   151     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;

   152     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;

   153     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;

   154     d[0] = (a0 * 3 + b0 + 2) >> 2;

   155     d[1] = (a1 * 3 + b1 + 2) >> 2;

   156     d[2] = (a2 * 3 + b2 + 2) >> 2;

   157     d += 3;

   158     s += 4;

   159     t += 4;

   160   }

   161 }

   163 // Filter rows 1 and 2 together, 1 : 1

   164 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,

   165                             uint8* d, int dst_width) {

   166   const uint8* s = src_ptr;

   167   const uint8* t = src_ptr + src_stride;

   168   int x;

   169   assert((dst_width % 3 == 0) && (dst_width > 0));

   170   for (x = 0; x < dst_width; x += 3) {

   171     uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;

   172     uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;

   173     uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;

   174     uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;

   175     uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;

   176     uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;

   177     d[0] = (a0 + b0 + 1) >> 1;

   178     d[1] = (a1 + b1 + 1) >> 1;

   179     d[2] = (a2 + b2 + 1) >> 1;

   180     d += 3;

   181     s += 4;

   182     t += 4;

   183   }

   184 }

   186 // Scales a single row of pixels using point sampling.

   187 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr,

   188                  int dst_width, int x, int dx) {

   189   int j;

   190   for (j = 0; j < dst_width - 1; j += 2) {

   191     dst_ptr[0] = src_ptr[x >> 16];

   192     x += dx;

   193     dst_ptr[1] = src_ptr[x >> 16];

   194     x += dx;

   195     dst_ptr += 2;

   196   }

   197   if (dst_width & 1) {

   198     dst_ptr[0] = src_ptr[x >> 16];

   199   }

   200 }

   202 // Scales a single row of pixels up by 2x using point sampling.

   203 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr,

   204                     int dst_width, int x, int dx) {

   205   int j;

   206   for (j = 0; j < dst_width - 1; j += 2) {

   207     dst_ptr[1] = dst_ptr[0] = src_ptr[0];

   208     src_ptr += 1;

   209     dst_ptr += 2;

   210   }

   211   if (dst_width & 1) {

   212     dst_ptr[0] = src_ptr[0];

   213   }

   214 }

   216 // (1-f)a + fb can be replaced with a + f(b-a)

   217 #define BLENDER(a, b, f) (uint8)((int)(a) + \

   218     ((int)(f) * ((int)(b) - (int)(a)) >> 16))

   220 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr,

   221                        int dst_width, int x, int dx) {

   222   int j;

   223   for (j = 0; j < dst_width - 1; j += 2) {

   224     int xi = x >> 16;

   225     int a = src_ptr[xi];

   226     int b = src_ptr[xi + 1];

   227     dst_ptr[0] = BLENDER(a, b, x & 0xffff);

   228     x += dx;

   229     xi = x >> 16;

   230     a = src_ptr[xi];

   231     b = src_ptr[xi + 1];

   232     dst_ptr[1] = BLENDER(a, b, x & 0xffff);

   233     x += dx;

   234     dst_ptr += 2;

   235   }

   236   if (dst_width & 1) {

   237     int xi = x >> 16;

   238     int a = src_ptr[xi];

   239     int b = src_ptr[xi + 1];

   240     dst_ptr[0] = BLENDER(a, b, x & 0xffff);

   241   }

   242 }

   244 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr,

   245                          int dst_width, int x32, int dx) {

   246   int64 x = (int64)(x32);

   247   int j;

   248   for (j = 0; j < dst_width - 1; j += 2) {

   249     int64 xi = x >> 16;

   250     int a = src_ptr[xi];

   251     int b = src_ptr[xi + 1];

   252     dst_ptr[0] = BLENDER(a, b, x & 0xffff);

   253     x += dx;

   254     xi = x >> 16;

   255     a = src_ptr[xi];

   256     b = src_ptr[xi + 1];

   257     dst_ptr[1] = BLENDER(a, b, x & 0xffff);

   258     x += dx;

   259     dst_ptr += 2;

   260   }

   261   if (dst_width & 1) {

   262     int64 xi = x >> 16;

   263     int a = src_ptr[xi];

   264     int b = src_ptr[xi + 1];

   265     dst_ptr[0] = BLENDER(a, b, x & 0xffff);

   266   }

   267 }

   268 #undef BLENDER

   270 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride,

   271                       uint8* dst, int dst_width) {

   272   int x;

   273   assert(dst_width % 3 == 0);

   274   for (x = 0; x < dst_width; x += 3) {

   275     dst[0] = src_ptr[0];

   276     dst[1] = src_ptr[3];

   277     dst[2] = src_ptr[6];

   278     dst += 3;

   279     src_ptr += 8;

   280   }

   281 }

   283 // 8x3 -> 3x1

   284 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,

   285                             ptrdiff_t src_stride,

   286                             uint8* dst_ptr, int dst_width) {

   287   intptr_t stride = src_stride;

   288   int i;

   289   assert((dst_width % 3 == 0) && (dst_width > 0));

   290   for (i = 0; i < dst_width; i += 3) {

   291     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +

   292         src_ptr[stride + 0] + src_ptr[stride + 1] +

   293         src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +

   294         src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *

   295         (65536 / 9) >> 16;

   296     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +

   297         src_ptr[stride + 3] + src_ptr[stride + 4] +

   298         src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +

   299         src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *

   300         (65536 / 9) >> 16;

   301     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +

   302         src_ptr[stride + 6] + src_ptr[stride + 7] +

   303         src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *

   304         (65536 / 6) >> 16;

   305     src_ptr += 8;

   306     dst_ptr += 3;

   307   }

   308 }

   310 // 8x2 -> 3x1

   311 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride,

   312                             uint8* dst_ptr, int dst_width) {

   313   intptr_t stride = src_stride;

   314   int i;

   315   assert((dst_width % 3 == 0) && (dst_width > 0));

   316   for (i = 0; i < dst_width; i += 3) {

   317     dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] +

   318         src_ptr[stride + 0] + src_ptr[stride + 1] +

   319         src_ptr[stride + 2]) * (65536 / 6) >> 16;

   320     dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] +

   321         src_ptr[stride + 3] + src_ptr[stride + 4] +

   322         src_ptr[stride + 5]) * (65536 / 6) >> 16;

   323     dst_ptr[2] = (src_ptr[6] + src_ptr[7] +

   324         src_ptr[stride + 6] + src_ptr[stride + 7]) *

   325         (65536 / 4) >> 16;

   326     src_ptr += 8;

   327     dst_ptr += 3;

   328   }

   329 }

   331 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride,

   332                     uint16* dst_ptr, int src_width, int src_height) {

   333   int x;

   334   assert(src_width > 0);

   335   assert(src_height > 0);

   336   for (x = 0; x < src_width; ++x) {

   337     const uint8* s = src_ptr + x;

   338     unsigned int sum = 0u;

   339     int y;

   340     for (y = 0; y < src_height; ++y) {

   341       sum += s[0];

   342       s += src_stride;

   343     }

   344     // TODO(fbarchard): Consider limitting height to 256 to avoid overflow.

   345     dst_ptr[x] = sum < 65535u ? sum : 65535u;

   346   }

   347 }

   349 void ScaleARGBRowDown2_C(const uint8* src_argb,

   350                          ptrdiff_t src_stride,

   351                          uint8* dst_argb, int dst_width) {

   352   const uint32* src = (const uint32*)(src_argb);

   353   uint32* dst = (uint32*)(dst_argb);

   355   int x;

   356   for (x = 0; x < dst_width - 1; x += 2) {

   357     dst[0] = src[1];

   358     dst[1] = src[3];

   359     src += 4;

   360     dst += 2;

   361   }

   362   if (dst_width & 1) {

   363     dst[0] = src[1];

   364   }

   365 }

   367 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,

   368                                ptrdiff_t src_stride,

   369                                uint8* dst_argb, int dst_width) {

   370   int x;

   371   for (x = 0; x < dst_width; ++x) {

   372     dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;

   373     dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;

   374     dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;

   375     dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;

   376     src_argb += 8;

   377     dst_argb += 4;

   378   }

   379 }

   381 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride,

   382                             uint8* dst_argb, int dst_width) {

   383   int x;

   384   for (x = 0; x < dst_width; ++x) {

   385     dst_argb[0] = (src_argb[0] + src_argb[4] +

   386                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;

   387     dst_argb[1] = (src_argb[1] + src_argb[5] +

   388                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;

   389     dst_argb[2] = (src_argb[2] + src_argb[6] +

   390                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;

   391     dst_argb[3] = (src_argb[3] + src_argb[7] +

   392                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;

   393     src_argb += 8;

   394     dst_argb += 4;

   395   }

   396 }

   398 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride,

   399                             int src_stepx,

   400                             uint8* dst_argb, int dst_width) {

   401   const uint32* src = (const uint32*)(src_argb);

   402   uint32* dst = (uint32*)(dst_argb);

   404   int x;

   405   for (x = 0; x < dst_width - 1; x += 2) {

   406     dst[0] = src[0];

   407     dst[1] = src[src_stepx];

   408     src += src_stepx * 2;

   409     dst += 2;

   410   }

   411   if (dst_width & 1) {

   412     dst[0] = src[0];

   413   }

   414 }

   416 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,

   417                                ptrdiff_t src_stride,

   418                                int src_stepx,

   419                                uint8* dst_argb, int dst_width) {

   420   int x;

   421   for (x = 0; x < dst_width; ++x) {

   422     dst_argb[0] = (src_argb[0] + src_argb[4] +

   423                   src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2;

   424     dst_argb[1] = (src_argb[1] + src_argb[5] +

   425                   src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2;

   426     dst_argb[2] = (src_argb[2] + src_argb[6] +

   427                   src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2;

   428     dst_argb[3] = (src_argb[3] + src_argb[7] +

   429                   src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2;

   430     src_argb += src_stepx * 4;

   431     dst_argb += 4;

   432   }

   433 }

   435 // Scales a single row of pixels using point sampling.

   436 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb,

   437                      int dst_width, int x, int dx) {

   438   const uint32* src = (const uint32*)(src_argb);

   439   uint32* dst = (uint32*)(dst_argb);

   440   int j;

   441   for (j = 0; j < dst_width - 1; j += 2) {

   442     dst[0] = src[x >> 16];

   443     x += dx;

   444     dst[1] = src[x >> 16];

   445     x += dx;

   446     dst += 2;

   447   }

   448   if (dst_width & 1) {

   449     dst[0] = src[x >> 16];

   450   }

   451 }

   453 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb,

   454                        int dst_width, int x32, int dx) {

   455   int64 x = (int64)(x32);

   456   const uint32* src = (const uint32*)(src_argb);

   457   uint32* dst = (uint32*)(dst_argb);

   458   int j;

   459   for (j = 0; j < dst_width - 1; j += 2) {

   460     dst[0] = src[x >> 16];

   461     x += dx;

   462     dst[1] = src[x >> 16];

   463     x += dx;

   464     dst += 2;

   465   }

   466   if (dst_width & 1) {

   467     dst[0] = src[x >> 16];

   468   }

   469 }

   471 // Scales a single row of pixels up by 2x using point sampling.

   472 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb,

   473                         int dst_width, int x, int dx) {

   474   const uint32* src = (const uint32*)(src_argb);

   475   uint32* dst = (uint32*)(dst_argb);

   476   int j;

   477   for (j = 0; j < dst_width - 1; j += 2) {

   478     dst[1] = dst[0] = src[0];

   479     src += 1;

   480     dst += 2;

   481   }

   482   if (dst_width & 1) {

   483     dst[0] = src[0];

   484   }

   485 }

   487 // Mimics SSSE3 blender

   488 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7

   489 #define BLENDERC(a, b, f, s) (uint32)( \

   490     BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)

   491 #define BLENDER(a, b, f) \

   492     BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \

   493     BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)

   495 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb,

   496                            int dst_width, int x, int dx) {

   497   const uint32* src = (const uint32*)(src_argb);

   498   uint32* dst = (uint32*)(dst_argb);

   499   int j;

   500   for (j = 0; j < dst_width - 1; j += 2) {

   501     int xi = x >> 16;

   502     int xf = (x >> 9) & 0x7f;

   503     uint32 a = src[xi];

   504     uint32 b = src[xi + 1];

   505     dst[0] = BLENDER(a, b, xf);

   506     x += dx;

   507     xi = x >> 16;

   508     xf = (x >> 9) & 0x7f;

   509     a = src[xi];

   510     b = src[xi + 1];

   511     dst[1] = BLENDER(a, b, xf);

   512     x += dx;

   513     dst += 2;

   514   }

   515   if (dst_width & 1) {

   516     int xi = x >> 16;

   517     int xf = (x >> 9) & 0x7f;

   518     uint32 a = src[xi];

   519     uint32 b = src[xi + 1];

   520     dst[0] = BLENDER(a, b, xf);

   521   }

   522 }

   524 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,

   525                              int dst_width, int x32, int dx) {

   526   int64 x = (int64)(x32);

   527   const uint32* src = (const uint32*)(src_argb);

   528   uint32* dst = (uint32*)(dst_argb);

   529   int j;

   530   for (j = 0; j < dst_width - 1; j += 2) {

   531     int64 xi = x >> 16;

   532     int xf = (x >> 9) & 0x7f;

   533     uint32 a = src[xi];

   534     uint32 b = src[xi + 1];

   535     dst[0] = BLENDER(a, b, xf);

   536     x += dx;

   537     xi = x >> 16;

   538     xf = (x >> 9) & 0x7f;

   539     a = src[xi];

   540     b = src[xi + 1];

   541     dst[1] = BLENDER(a, b, xf);

   542     x += dx;

   543     dst += 2;

   544   }

   545   if (dst_width & 1) {

   546     int64 xi = x >> 16;

   547     int xf = (x >> 9) & 0x7f;

   548     uint32 a = src[xi];

   549     uint32 b = src[xi + 1];

   550     dst[0] = BLENDER(a, b, xf);

   551   }

   552 }

   553 #undef BLENDER1

   554 #undef BLENDERC

   555 #undef BLENDER

   557 // Scale plane vertically with bilinear interpolation.

   558 void ScalePlaneVertical(int src_height,

   559                         int dst_width, int dst_height,

   560                         int src_stride, int dst_stride,

   561                         const uint8* src_argb, uint8* dst_argb,

   562                         int x, int y, int dy,

   563                         int bpp, enum FilterMode filtering) {

   564   // TODO(fbarchard): Allow higher bpp.

   565   int dst_width_bytes = dst_width * bpp;

   566   void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,

   567       ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

   568       InterpolateRow_C;

   569   const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;

   570   int j;

   571   assert(bpp >= 1 && bpp <= 4);

   572   assert(src_height != 0);

   573   assert(dst_width > 0);

   574   assert(dst_height > 0);

   575   src_argb += (x >> 16) * bpp;

   576 #if defined(HAS_INTERPOLATEROW_SSE2)

   577   if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) {

   578     InterpolateRow = InterpolateRow_Any_SSE2;

   579     if (IS_ALIGNED(dst_width_bytes, 16)) {

   580       InterpolateRow = InterpolateRow_Unaligned_SSE2;

   581       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&

   582           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {

   583         InterpolateRow = InterpolateRow_SSE2;

   584       }

   585     }

   586   }

   587 #endif

   588 #if defined(HAS_INTERPOLATEROW_SSSE3)

   589   if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) {

   590     InterpolateRow = InterpolateRow_Any_SSSE3;

   591     if (IS_ALIGNED(dst_width_bytes, 16)) {

   592       InterpolateRow = InterpolateRow_Unaligned_SSSE3;

   593       if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&

   594           IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {

   595         InterpolateRow = InterpolateRow_SSSE3;

   596       }

   597     }

   598   }

   599 #endif

   600 #if defined(HAS_INTERPOLATEROW_AVX2)

   601   if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) {

   602     InterpolateRow = InterpolateRow_Any_AVX2;

   603     if (IS_ALIGNED(dst_width_bytes, 32)) {

   604       InterpolateRow = InterpolateRow_AVX2;

   605     }

   606   }

   607 #endif

   608 #if defined(HAS_INTERPOLATEROW_NEON)

   609   if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) {

   610     InterpolateRow = InterpolateRow_Any_NEON;

   611     if (IS_ALIGNED(dst_width_bytes, 16)) {

   612       InterpolateRow = InterpolateRow_NEON;

   613     }

   614   }

   615 #endif

   616 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)

   617   if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 &&

   618       IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) &&

   619       IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {

   620     InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;

   621     if (IS_ALIGNED(dst_width_bytes, 4)) {

   622       InterpolateRow = InterpolateRow_MIPS_DSPR2;

   623     }

   624   }

   625 #endif

   626   for (j = 0; j < dst_height; ++j) {

   627     int yi;

   628     int yf;

   629     if (y > max_y) {

   630       y = max_y;

   631     }

   632     yi = y >> 16;

   633     yf = filtering ? ((y >> 8) & 255) : 0;

   634     InterpolateRow(dst_argb, src_argb + yi * src_stride,

   635                    src_stride, dst_width_bytes, yf);

   636     dst_argb += dst_stride;

   637     y += dy;

   638   }

   639 }

   641 // Simplify the filtering based on scale factors.

   642 enum FilterMode ScaleFilterReduce(int src_width, int src_height,

   643                                   int dst_width, int dst_height,

   644                                   enum FilterMode filtering) {

   645   if (src_width < 0) {

   646     src_width = -src_width;

   647   }

   648   if (src_height < 0) {

   649     src_height = -src_height;

   650   }

   651   if (filtering == kFilterBox) {

   652     // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.

   653     if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {

   654       filtering = kFilterBilinear;

   655     }

   656     // If scaling to larger, switch from Box to Bilinear.

   657     if (dst_width >= src_width || dst_height >= src_height) {

   658       filtering = kFilterBilinear;

   659     }

   660   }

   661   if (filtering == kFilterBilinear) {

   662     if (src_height == 1) {

   663       filtering = kFilterLinear;

   664     }

   665     // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.

   666     if (dst_height == src_height || dst_height * 3 == src_height) {

   667       filtering = kFilterLinear;

   668     }

   669     // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to

   670     // avoid reading 2 pixels horizontally that causes memory exception.

   671     if (src_width == 1) {

   672       filtering = kFilterNone;

   673     }

   674   }

   675   if (filtering == kFilterLinear) {

   676     if (src_width == 1) {

   677       filtering = kFilterNone;

   678     }

   679     // TODO(fbarchard): Detect any odd scale factor and reduce to None.

   680     if (dst_width == src_width || dst_width * 3 == src_width) {

   681       filtering = kFilterNone;

   682     }

   683   }

   684   return filtering;

   685 }

   687 // Divide num by div and return as 16.16 fixed point result.

   688 int FixedDiv_C(int num, int div) {

   689   return (int)(((int64)(num) << 16) / div);

   690 }

   692 // Divide num by div and return as 16.16 fixed point result.

   693 int FixedDiv1_C(int num, int div) {

   694   return (int)((((int64)(num) << 16) - 0x00010001) /

   695                           (div - 1));

   696 }

   698 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)

   700 // Compute slope values for stepping.

   701 void ScaleSlope(int src_width, int src_height,

   702                 int dst_width, int dst_height,

   703                 enum FilterMode filtering,

   704                 int* x, int* y, int* dx, int* dy) {

   705   assert(x != NULL);

   706   assert(y != NULL);

   707   assert(dx != NULL);

   708   assert(dy != NULL);

   709   assert(src_width != 0);

   710   assert(src_height != 0);

   711   assert(dst_width > 0);

   712   assert(dst_height > 0);

   713   // Check for 1 pixel and avoid FixedDiv overflow.

   714   if (dst_width == 1 && src_width >= 32768) {

   715     dst_width = src_width;

   716   }

   717   if (dst_height == 1 && src_height >= 32768) {

   718     dst_height = src_height;

   719   }

   720   if (filtering == kFilterBox) {

   721     // Scale step for point sampling duplicates all pixels equally.

   722     *dx = FixedDiv(Abs(src_width), dst_width);

   723     *dy = FixedDiv(src_height, dst_height);

   724     *x = 0;

   725     *y = 0;

   726   } else if (filtering == kFilterBilinear) {

   727     // Scale step for bilinear sampling renders last pixel once for upsample.

   728     if (dst_width <= Abs(src_width)) {

   729       *dx = FixedDiv(Abs(src_width), dst_width);

   730       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.

   731     } else if (dst_width > 1) {

   732       *dx = FixedDiv1(Abs(src_width), dst_width);

   733       *x = 0;

   734     }

   735     if (dst_height <= src_height) {

   736       *dy = FixedDiv(src_height,  dst_height);

   737       *y = CENTERSTART(*dy, -32768);  // Subtract 0.5 (32768) to center filter.

   738     } else if (dst_height > 1) {

   739       *dy = FixedDiv1(src_height, dst_height);

   740       *y = 0;

   741     }

   742   } else if (filtering == kFilterLinear) {

   743     // Scale step for bilinear sampling renders last pixel once for upsample.

   744     if (dst_width <= Abs(src_width)) {

   745       *dx = FixedDiv(Abs(src_width), dst_width);

   746       *x = CENTERSTART(*dx, -32768);  // Subtract 0.5 (32768) to center filter.

   747     } else if (dst_width > 1) {

   748       *dx = FixedDiv1(Abs(src_width), dst_width);

   749       *x = 0;

   750     }

   751     *dy = FixedDiv(src_height, dst_height);

   752     *y = *dy >> 1;

   753   } else {

   754     // Scale step for point sampling duplicates all pixels equally.

   755     *dx = FixedDiv(Abs(src_width), dst_width);

   756     *dy = FixedDiv(src_height, dst_height);

   757     *x = CENTERSTART(*dx, 0);

   758     *y = CENTERSTART(*dy, 0);

   759   }

   760   // Negative src_width means horizontally mirror.

   761   if (src_width < 0) {

   762     *x += (dst_width - 1) * *dx;

   763     *dx = -*dx;

   764     // src_width = -src_width;   // Caller must do this.

   765   }

   766 }

   767 #undef CENTERSTART

   769 #ifdef __cplusplus

   770 }  // extern "C"

   771 }  // namespace libyuv

   772 #endif

The Tor Browser / file revision

media/libyuv/source/scale_common.cc@b8a032363ba2

media/libyuv/source/scale_common.cc