The Tor Browser: gfx/ycbcr/yuv_convert.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

     2 // Use of this source code is governed by a BSD-style license that can be

     3 // found in the LICENSE file.

     5 // This webpage shows layout of YV12 and other YUV formats

     6 // http://www.fourcc.org/yuv.php

     7 // The actual conversion is best described here

     8 // http://en.wikipedia.org/wiki/YUV

     9 // An article on optimizing YUV conversion using tables instead of multiplies

    10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf

    11 //

    12 // YV12 is a full plane of Y and a half height, half width chroma planes

    13 // YV16 is a full plane of Y and a full height, half width chroma planes

    14 // YV24 is a full plane of Y and a full height, full width chroma planes

    15 //

    16 // ARGB pixel format is output, which on little endian is stored as BGRA.

    17 // The alpha is set to 255, allowing the application to use RGBA or RGB32.

    19 #include "yuv_convert.h"

    21 // Header for low level row functions.

    22 #include "yuv_row.h"

    23 #include "mozilla/SSE.h"

    25 namespace mozilla {

    27 namespace gfx {

    29 // 16.16 fixed point arithmetic

    30 const int kFractionBits = 16;

    31 const int kFractionMax = 1 << kFractionBits;

    32 const int kFractionMask = ((1 << kFractionBits) - 1);

    34 NS_GFX_(YUVType) TypeFromSize(int ywidth,

    35                               int yheight,

    36                               int cbcrwidth,

    37                               int cbcrheight)

    38 {

    39   if (ywidth == cbcrwidth && yheight == cbcrheight) {

    40     return YV24;

    41   }

    42   else if (ywidth / 2 == cbcrwidth && yheight == cbcrheight) {

    43     return YV16;

    44   }

    45   else {

    46     return YV12;

    47   }

    48 }

    50 // Convert a frame of YUV to 32 bit ARGB.

    51 NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,

    52                                   const uint8* u_buf,

    53                                   const uint8* v_buf,

    54                                   uint8* rgb_buf,

    55                                   int pic_x,

    56                                   int pic_y,

    57                                   int pic_width,

    58                                   int pic_height,

    59                                   int y_pitch,

    60                                   int uv_pitch,

    61                                   int rgb_pitch,

    62                                   YUVType yuv_type) {

    63   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;

    64   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;

    65   // Test for SSE because the optimized code uses movntq, which is not part of MMX.

    66   bool has_sse = supports_mmx() && supports_sse();

    67   // There is no optimized YV24 SSE routine so we check for this and

    68   // fall back to the C code.

    69   has_sse &= yuv_type != YV24;

    70   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;

    71   int x_width = odd_pic_x ? pic_width - 1 : pic_width;

    73   for (int y = pic_y; y < pic_height + pic_y; ++y) {

    74     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;

    75     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;

    76     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);

    77     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);

    79     if (odd_pic_x) {

    80       // Handle the single odd pixel manually and use the

    81       // fast routines for the remaining.

    82       FastConvertYUVToRGB32Row_C(y_ptr++,

    83                                  u_ptr++,

    84                                  v_ptr++,

    85                                  rgb_row,

    86                                  1,

    87                                  x_shift);

    88       rgb_row += 4;

    89     }

    91     if (has_sse) {

    92       FastConvertYUVToRGB32Row(y_ptr,

    93                                u_ptr,

    94                                v_ptr,

    95                                rgb_row,

    96                                x_width);

    97     }

    98     else {

    99       FastConvertYUVToRGB32Row_C(y_ptr,

   100                                  u_ptr,

   101                                  v_ptr,

   102                                  rgb_row,

   103                                  x_width,

   104                                  x_shift);

   105     }

   106   }

   108   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.

   109   if (has_sse)

   110     EMMS();

   111 }

   113 // C version does 8 at a time to mimic MMX code

   114 static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,

   115                          int source_width, int source_y_fraction) {

   116   int y1_fraction = source_y_fraction;

   117   int y0_fraction = 256 - y1_fraction;

   118   uint8* end = ybuf + source_width;

   119   do {

   120     ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;

   121     ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;

   122     ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;

   123     ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;

   124     ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;

   125     ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;

   126     ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;

   127     ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;

   128     y0_ptr += 8;

   129     y1_ptr += 8;

   130     ybuf += 8;

   131   } while (ybuf < end);

   132 }

   134 #ifdef MOZILLA_MAY_SUPPORT_MMX

   135 void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,

   136                     int source_width, int source_y_fraction);

   137 #endif

   139 #ifdef MOZILLA_MAY_SUPPORT_SSE2

   140 void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,

   141                      int source_width, int source_y_fraction);

   142 #endif

   144 static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,

   145                               const uint8* y1_ptr, int source_width,

   146                               int source_y_fraction) {

   147 #ifdef MOZILLA_MAY_SUPPORT_SSE2

   148   if (mozilla::supports_sse2()) {

   149     FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);

   150     return;

   151   }

   152 #endif

   154 #ifdef MOZILLA_MAY_SUPPORT_MMX

   155   if (mozilla::supports_mmx()) {

   156     FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);

   157     return;

   158   }

   159 #endif

   161   FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);

   162 }

   165 // Scale a frame of YUV to 32 bit ARGB.

   166 NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,

   167                                 const uint8* u_buf,

   168                                 const uint8* v_buf,

   169                                 uint8* rgb_buf,

   170                                 int source_width,

   171                                 int source_height,

   172                                 int width,

   173                                 int height,

   174                                 int y_pitch,

   175                                 int uv_pitch,

   176                                 int rgb_pitch,

   177                                 YUVType yuv_type,

   178                                 Rotate view_rotate,

   179                                 ScaleFilter filter) {

   180   bool has_mmx = supports_mmx();

   182   // 4096 allows 3 buffers to fit in 12k.

   183   // Helps performance on CPU with 16K L1 cache.

   184   // Large enough for 3830x2160 and 30" displays which are 2560x1600.

   185   const int kFilterBufferSize = 4096;

   186   // Disable filtering if the screen is too big (to avoid buffer overflows).

   187   // This should never happen to regular users: they don't have monitors

   188   // wider than 4096 pixels.

   189   // TODO(fbarchard): Allow rotated videos to filter.

   190   if (source_width > kFilterBufferSize || view_rotate)

   191     filter = FILTER_NONE;

   193   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;

   194   // Diagram showing origin and direction of source sampling.

   195   // ->0   4<-

   196   // 7       3

   197   //

   198   // 6       5

   199   // ->1   2<-

   200   // Rotations that start at right side of image.

   201   if ((view_rotate == ROTATE_180) ||

   202       (view_rotate == ROTATE_270) ||

   203       (view_rotate == MIRROR_ROTATE_0) ||

   204       (view_rotate == MIRROR_ROTATE_90)) {

   205     y_buf += source_width - 1;

   206     u_buf += source_width / 2 - 1;

   207     v_buf += source_width / 2 - 1;

   208     source_width = -source_width;

   209   }

   210   // Rotations that start at bottom of image.

   211   if ((view_rotate == ROTATE_90) ||

   212       (view_rotate == ROTATE_180) ||

   213       (view_rotate == MIRROR_ROTATE_90) ||

   214       (view_rotate == MIRROR_ROTATE_180)) {

   215     y_buf += (source_height - 1) * y_pitch;

   216     u_buf += ((source_height >> y_shift) - 1) * uv_pitch;

   217     v_buf += ((source_height >> y_shift) - 1) * uv_pitch;

   218     source_height = -source_height;

   219   }

   221   // Handle zero sized destination.

   222   if (width == 0 || height == 0)

   223     return;

   224   int source_dx = source_width * kFractionMax / width;

   225   int source_dy = source_height * kFractionMax / height;

   226   int source_dx_uv = source_dx;

   228   if ((view_rotate == ROTATE_90) ||

   229       (view_rotate == ROTATE_270)) {

   230     int tmp = height;

   231     height = width;

   232     width = tmp;

   233     tmp = source_height;

   234     source_height = source_width;

   235     source_width = tmp;

   236     int original_dx = source_dx;

   237     int original_dy = source_dy;

   238     source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;

   239     source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;

   240     source_dy = original_dx;

   241     if (view_rotate == ROTATE_90) {

   242       y_pitch = -1;

   243       uv_pitch = -1;

   244       source_height = -source_height;

   245     } else {

   246       y_pitch = 1;

   247       uv_pitch = 1;

   248     }

   249   }

   251   // Need padding because FilterRows() will write 1 to 16 extra pixels

   252   // after the end for SSE2 version.

   253   uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];

   254   uint8* ybuf =

   255       reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);

   256   uint8* ubuf = ybuf + kFilterBufferSize;

   257   uint8* vbuf = ubuf + kFilterBufferSize;

   258   // TODO(fbarchard): Fixed point math is off by 1 on negatives.

   259   int yscale_fixed = (source_height << kFractionBits) / height;

   261   // TODO(fbarchard): Split this into separate function for better efficiency.

   262   for (int y = 0; y < height; ++y) {

   263     uint8* dest_pixel = rgb_buf + y * rgb_pitch;

   264     int source_y_subpixel = (y * yscale_fixed);

   265     if (yscale_fixed >= (kFractionMax * 2)) {

   266       source_y_subpixel += kFractionMax / 2;  // For 1/2 or less, center filter.

   267     }

   268     int source_y = source_y_subpixel >> kFractionBits;

   270     const uint8* y0_ptr = y_buf + source_y * y_pitch;

   271     const uint8* y1_ptr = y0_ptr + y_pitch;

   273     const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;

   274     const uint8* u1_ptr = u0_ptr + uv_pitch;

   275     const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;

   276     const uint8* v1_ptr = v0_ptr + uv_pitch;

   278     // vertical scaler uses 16.8 fixed point

   279     int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;

   280     int source_uv_fraction =

   281         ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;

   283     const uint8* y_ptr = y0_ptr;

   284     const uint8* u_ptr = u0_ptr;

   285     const uint8* v_ptr = v0_ptr;

   286     // Apply vertical filtering if necessary.

   287     // TODO(fbarchard): Remove memcpy when not necessary.

   288     if (filter & mozilla::gfx::FILTER_BILINEAR_V) {

   289       if (yscale_fixed != kFractionMax &&

   290           source_y_fraction && ((source_y + 1) < source_height)) {

   291         FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);

   292       } else {

   293         memcpy(ybuf, y0_ptr, source_width);

   294       }

   295       y_ptr = ybuf;

   296       ybuf[source_width] = ybuf[source_width-1];

   297       int uv_source_width = (source_width + 1) / 2;

   298       if (yscale_fixed != kFractionMax &&

   299           source_uv_fraction &&

   300           (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {

   301         FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);

   302         FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);

   303       } else {

   304         memcpy(ubuf, u0_ptr, uv_source_width);

   305         memcpy(vbuf, v0_ptr, uv_source_width);

   306       }

   307       u_ptr = ubuf;

   308       v_ptr = vbuf;

   309       ubuf[uv_source_width] = ubuf[uv_source_width - 1];

   310       vbuf[uv_source_width] = vbuf[uv_source_width - 1];

   311     }

   312     if (source_dx == kFractionMax) {  // Not scaled

   313       FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,

   314                                dest_pixel, width);

   315     } else if (filter & FILTER_BILINEAR_H) {

   316         LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,

   317                                  dest_pixel, width, source_dx);

   318     } else {

   319 // Specialized scalers and rotation.

   320 #if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)

   321       if(mozilla::supports_sse()) {

   322         if (width == (source_width * 2)) {

   323           DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,

   324                                   dest_pixel, width);

   325         } else if ((source_dx & kFractionMask) == 0) {

   326           // Scaling by integer scale factor. ie half.

   327           ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,

   328                                    dest_pixel, width,

   329                                    source_dx >> kFractionBits);

   330         } else if (source_dx_uv == source_dx) {  // Not rotated.

   331           ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,

   332                              dest_pixel, width, source_dx);

   333         } else {

   334           RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,

   335                                          dest_pixel, width,

   336                                          source_dx >> kFractionBits,

   337                                          source_dx_uv >> kFractionBits);

   338         }

   339       }

   340       else {

   341         ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,

   342                              dest_pixel, width, source_dx);

   343       }

   344 #else

   345       (void)source_dx_uv;

   346       ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,

   347                          dest_pixel, width, source_dx);

   348 #endif

   349     }

   350   }

   351   // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.

   352   if (has_mmx)

   353     EMMS();

   354 }

   356 }  // namespace gfx

   357 }  // namespace mozilla

The Tor Browser / file revision

gfx/ycbcr/yuv_convert.cpp@6474c204b198

gfx/ycbcr/yuv_convert.cpp