gfx/ycbcr/yuv_convert.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/ycbcr/yuv_convert.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,357 @@
     1.4 +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
     1.5 +// Use of this source code is governed by a BSD-style license that can be
     1.6 +// found in the LICENSE file.
     1.7 +
     1.8 +// This webpage shows layout of YV12 and other YUV formats
     1.9 +// http://www.fourcc.org/yuv.php
    1.10 +// The actual conversion is best described here
    1.11 +// http://en.wikipedia.org/wiki/YUV
    1.12 +// An article on optimizing YUV conversion using tables instead of multiplies
    1.13 +// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
    1.14 +//
    1.15 +// YV12 is a full plane of Y and a half height, half width chroma planes
    1.16 +// YV16 is a full plane of Y and a full height, half width chroma planes
    1.17 +// YV24 is a full plane of Y and a full height, full width chroma planes
    1.18 +//
    1.19 +// ARGB pixel format is output, which on little endian is stored as BGRA.
    1.20 +// The alpha is set to 255, allowing the application to use RGBA or RGB32.
    1.21 +
    1.22 +#include "yuv_convert.h"
    1.23 +
    1.24 +// Header for low level row functions.
    1.25 +#include "yuv_row.h"
    1.26 +#include "mozilla/SSE.h"
    1.27 +
    1.28 +namespace mozilla {
    1.29 +
    1.30 +namespace gfx {
    1.31 + 
    1.32 +// 16.16 fixed point arithmetic
    1.33 +const int kFractionBits = 16;
    1.34 +const int kFractionMax = 1 << kFractionBits;
    1.35 +const int kFractionMask = ((1 << kFractionBits) - 1);
    1.36 +
    1.37 +NS_GFX_(YUVType) TypeFromSize(int ywidth, 
    1.38 +                              int yheight, 
    1.39 +                              int cbcrwidth, 
    1.40 +                              int cbcrheight)
    1.41 +{
    1.42 +  if (ywidth == cbcrwidth && yheight == cbcrheight) {
    1.43 +    return YV24;
    1.44 +  }
    1.45 +  else if (ywidth / 2 == cbcrwidth && yheight == cbcrheight) {
    1.46 +    return YV16;
    1.47 +  }
    1.48 +  else {
    1.49 +    return YV12;
    1.50 +  }
    1.51 +}
    1.52 +
    1.53 +// Convert a frame of YUV to 32 bit ARGB.
    1.54 +NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
    1.55 +                                  const uint8* u_buf,
    1.56 +                                  const uint8* v_buf,
    1.57 +                                  uint8* rgb_buf,
    1.58 +                                  int pic_x,
    1.59 +                                  int pic_y,
    1.60 +                                  int pic_width,
    1.61 +                                  int pic_height,
    1.62 +                                  int y_pitch,
    1.63 +                                  int uv_pitch,
    1.64 +                                  int rgb_pitch,
    1.65 +                                  YUVType yuv_type) {
    1.66 +  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
    1.67 +  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
    1.68 +  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
    1.69 +  bool has_sse = supports_mmx() && supports_sse();
    1.70 +  // There is no optimized YV24 SSE routine so we check for this and
    1.71 +  // fall back to the C code.
    1.72 +  has_sse &= yuv_type != YV24;
    1.73 +  bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
    1.74 +  int x_width = odd_pic_x ? pic_width - 1 : pic_width;
    1.75 +
    1.76 +  for (int y = pic_y; y < pic_height + pic_y; ++y) {
    1.77 +    uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
    1.78 +    const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
    1.79 +    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
    1.80 +    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
    1.81 +
    1.82 +    if (odd_pic_x) {
    1.83 +      // Handle the single odd pixel manually and use the
    1.84 +      // fast routines for the remaining.
    1.85 +      FastConvertYUVToRGB32Row_C(y_ptr++,
    1.86 +                                 u_ptr++,
    1.87 +                                 v_ptr++,
    1.88 +                                 rgb_row,
    1.89 +                                 1,
    1.90 +                                 x_shift);
    1.91 +      rgb_row += 4;
    1.92 +    }
    1.93 +
    1.94 +    if (has_sse) {
    1.95 +      FastConvertYUVToRGB32Row(y_ptr,
    1.96 +                               u_ptr,
    1.97 +                               v_ptr,
    1.98 +                               rgb_row,
    1.99 +                               x_width);
   1.100 +    }
   1.101 +    else {
   1.102 +      FastConvertYUVToRGB32Row_C(y_ptr,
   1.103 +                                 u_ptr,
   1.104 +                                 v_ptr,
   1.105 +                                 rgb_row,
   1.106 +                                 x_width,
   1.107 +                                 x_shift);
   1.108 +    }
   1.109 +  }
   1.110 +
   1.111 +  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
   1.112 +  if (has_sse)
   1.113 +    EMMS();
   1.114 +}
   1.115 +
   1.116 +// C version does 8 at a time to mimic MMX code
   1.117 +static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
   1.118 +                         int source_width, int source_y_fraction) {
   1.119 +  int y1_fraction = source_y_fraction;
   1.120 +  int y0_fraction = 256 - y1_fraction;
   1.121 +  uint8* end = ybuf + source_width;
   1.122 +  do {
   1.123 +    ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
   1.124 +    ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
   1.125 +    ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
   1.126 +    ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
   1.127 +    ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
   1.128 +    ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
   1.129 +    ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
   1.130 +    ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
   1.131 +    y0_ptr += 8;
   1.132 +    y1_ptr += 8;
   1.133 +    ybuf += 8;
   1.134 +  } while (ybuf < end);
   1.135 +}
   1.136 +
   1.137 +#ifdef MOZILLA_MAY_SUPPORT_MMX
   1.138 +void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
   1.139 +                    int source_width, int source_y_fraction);
   1.140 +#endif
   1.141 +
   1.142 +#ifdef MOZILLA_MAY_SUPPORT_SSE2
   1.143 +void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
   1.144 +                     int source_width, int source_y_fraction);
   1.145 +#endif
   1.146 +
   1.147 +static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr,
   1.148 +                              const uint8* y1_ptr, int source_width,
   1.149 +                              int source_y_fraction) {
   1.150 +#ifdef MOZILLA_MAY_SUPPORT_SSE2
   1.151 +  if (mozilla::supports_sse2()) {
   1.152 +    FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
   1.153 +    return;
   1.154 +  }
   1.155 +#endif
   1.156 +
   1.157 +#ifdef MOZILLA_MAY_SUPPORT_MMX
   1.158 +  if (mozilla::supports_mmx()) {
   1.159 +    FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
   1.160 +    return;
   1.161 +  }
   1.162 +#endif
   1.163 +
   1.164 +  FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
   1.165 +}
   1.166 +
   1.167 +
   1.168 +// Scale a frame of YUV to 32 bit ARGB.
   1.169 +NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
   1.170 +                                const uint8* u_buf,
   1.171 +                                const uint8* v_buf,
   1.172 +                                uint8* rgb_buf,
   1.173 +                                int source_width,
   1.174 +                                int source_height,
   1.175 +                                int width,
   1.176 +                                int height,
   1.177 +                                int y_pitch,
   1.178 +                                int uv_pitch,
   1.179 +                                int rgb_pitch,
   1.180 +                                YUVType yuv_type,
   1.181 +                                Rotate view_rotate,
   1.182 +                                ScaleFilter filter) {
   1.183 +  bool has_mmx = supports_mmx();
   1.184 +
   1.185 +  // 4096 allows 3 buffers to fit in 12k.
   1.186 +  // Helps performance on CPU with 16K L1 cache.
   1.187 +  // Large enough for 3830x2160 and 30" displays which are 2560x1600.
   1.188 +  const int kFilterBufferSize = 4096;
   1.189 +  // Disable filtering if the screen is too big (to avoid buffer overflows).
   1.190 +  // This should never happen to regular users: they don't have monitors
   1.191 +  // wider than 4096 pixels.
   1.192 +  // TODO(fbarchard): Allow rotated videos to filter.
   1.193 +  if (source_width > kFilterBufferSize || view_rotate)
   1.194 +    filter = FILTER_NONE;
   1.195 +
   1.196 +  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
   1.197 +  // Diagram showing origin and direction of source sampling.
   1.198 +  // ->0   4<-
   1.199 +  // 7       3
   1.200 +  //
   1.201 +  // 6       5
   1.202 +  // ->1   2<-
   1.203 +  // Rotations that start at right side of image.
   1.204 +  if ((view_rotate == ROTATE_180) ||
   1.205 +      (view_rotate == ROTATE_270) ||
   1.206 +      (view_rotate == MIRROR_ROTATE_0) ||
   1.207 +      (view_rotate == MIRROR_ROTATE_90)) {
   1.208 +    y_buf += source_width - 1;
   1.209 +    u_buf += source_width / 2 - 1;
   1.210 +    v_buf += source_width / 2 - 1;
   1.211 +    source_width = -source_width;
   1.212 +  }
   1.213 +  // Rotations that start at bottom of image.
   1.214 +  if ((view_rotate == ROTATE_90) ||
   1.215 +      (view_rotate == ROTATE_180) ||
   1.216 +      (view_rotate == MIRROR_ROTATE_90) ||
   1.217 +      (view_rotate == MIRROR_ROTATE_180)) {
   1.218 +    y_buf += (source_height - 1) * y_pitch;
   1.219 +    u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
   1.220 +    v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
   1.221 +    source_height = -source_height;
   1.222 +  }
   1.223 +
   1.224 +  // Handle zero sized destination.
   1.225 +  if (width == 0 || height == 0)
   1.226 +    return;
   1.227 +  int source_dx = source_width * kFractionMax / width;
   1.228 +  int source_dy = source_height * kFractionMax / height;
   1.229 +  int source_dx_uv = source_dx;
   1.230 +
   1.231 +  if ((view_rotate == ROTATE_90) ||
   1.232 +      (view_rotate == ROTATE_270)) {
   1.233 +    int tmp = height;
   1.234 +    height = width;
   1.235 +    width = tmp;
   1.236 +    tmp = source_height;
   1.237 +    source_height = source_width;
   1.238 +    source_width = tmp;
   1.239 +    int original_dx = source_dx;
   1.240 +    int original_dy = source_dy;
   1.241 +    source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
   1.242 +    source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
   1.243 +    source_dy = original_dx;
   1.244 +    if (view_rotate == ROTATE_90) {
   1.245 +      y_pitch = -1;
   1.246 +      uv_pitch = -1;
   1.247 +      source_height = -source_height;
   1.248 +    } else {
   1.249 +      y_pitch = 1;
   1.250 +      uv_pitch = 1;
   1.251 +    }
   1.252 +  }
   1.253 +
   1.254 +  // Need padding because FilterRows() will write 1 to 16 extra pixels
   1.255 +  // after the end for SSE2 version.
   1.256 +  uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
   1.257 +  uint8* ybuf =
   1.258 +      reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);
   1.259 +  uint8* ubuf = ybuf + kFilterBufferSize;
   1.260 +  uint8* vbuf = ubuf + kFilterBufferSize;
   1.261 +  // TODO(fbarchard): Fixed point math is off by 1 on negatives.
   1.262 +  int yscale_fixed = (source_height << kFractionBits) / height;
   1.263 +
   1.264 +  // TODO(fbarchard): Split this into separate function for better efficiency.
   1.265 +  for (int y = 0; y < height; ++y) {
   1.266 +    uint8* dest_pixel = rgb_buf + y * rgb_pitch;
   1.267 +    int source_y_subpixel = (y * yscale_fixed);
   1.268 +    if (yscale_fixed >= (kFractionMax * 2)) {
   1.269 +      source_y_subpixel += kFractionMax / 2;  // For 1/2 or less, center filter.
   1.270 +    }
   1.271 +    int source_y = source_y_subpixel >> kFractionBits;
   1.272 +
   1.273 +    const uint8* y0_ptr = y_buf + source_y * y_pitch;
   1.274 +    const uint8* y1_ptr = y0_ptr + y_pitch;
   1.275 +
   1.276 +    const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
   1.277 +    const uint8* u1_ptr = u0_ptr + uv_pitch;
   1.278 +    const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
   1.279 +    const uint8* v1_ptr = v0_ptr + uv_pitch;
   1.280 +
   1.281 +    // vertical scaler uses 16.8 fixed point
   1.282 +    int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
   1.283 +    int source_uv_fraction =
   1.284 +        ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
   1.285 +
   1.286 +    const uint8* y_ptr = y0_ptr;
   1.287 +    const uint8* u_ptr = u0_ptr;
   1.288 +    const uint8* v_ptr = v0_ptr;
   1.289 +    // Apply vertical filtering if necessary.
   1.290 +    // TODO(fbarchard): Remove memcpy when not necessary.
   1.291 +    if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
   1.292 +      if (yscale_fixed != kFractionMax &&
   1.293 +          source_y_fraction && ((source_y + 1) < source_height)) {
   1.294 +        FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
   1.295 +      } else {
   1.296 +        memcpy(ybuf, y0_ptr, source_width);
   1.297 +      }
   1.298 +      y_ptr = ybuf;
   1.299 +      ybuf[source_width] = ybuf[source_width-1];
   1.300 +      int uv_source_width = (source_width + 1) / 2;
   1.301 +      if (yscale_fixed != kFractionMax &&
   1.302 +          source_uv_fraction &&
   1.303 +          (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
   1.304 +        FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
   1.305 +        FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
   1.306 +      } else {
   1.307 +        memcpy(ubuf, u0_ptr, uv_source_width);
   1.308 +        memcpy(vbuf, v0_ptr, uv_source_width);
   1.309 +      }
   1.310 +      u_ptr = ubuf;
   1.311 +      v_ptr = vbuf;
   1.312 +      ubuf[uv_source_width] = ubuf[uv_source_width - 1];
   1.313 +      vbuf[uv_source_width] = vbuf[uv_source_width - 1];
   1.314 +    }
   1.315 +    if (source_dx == kFractionMax) {  // Not scaled
   1.316 +      FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
   1.317 +                               dest_pixel, width);
   1.318 +    } else if (filter & FILTER_BILINEAR_H) {
   1.319 +        LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
   1.320 +                                 dest_pixel, width, source_dx);
   1.321 +    } else {
   1.322 +// Specialized scalers and rotation.
   1.323 +#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86)
   1.324 +      if(mozilla::supports_sse()) {
   1.325 +        if (width == (source_width * 2)) {
   1.326 +          DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
   1.327 +                                  dest_pixel, width);
   1.328 +        } else if ((source_dx & kFractionMask) == 0) {
   1.329 +          // Scaling by integer scale factor. ie half.
   1.330 +          ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
   1.331 +                                   dest_pixel, width,
   1.332 +                                   source_dx >> kFractionBits);
   1.333 +        } else if (source_dx_uv == source_dx) {  // Not rotated.
   1.334 +          ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
   1.335 +                             dest_pixel, width, source_dx);
   1.336 +        } else {
   1.337 +          RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr,
   1.338 +                                         dest_pixel, width,
   1.339 +                                         source_dx >> kFractionBits,
   1.340 +                                         source_dx_uv >> kFractionBits);
   1.341 +        }
   1.342 +      }
   1.343 +      else {
   1.344 +        ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
   1.345 +                             dest_pixel, width, source_dx);
   1.346 +      }
   1.347 +#else
   1.348 +      (void)source_dx_uv;
   1.349 +      ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
   1.350 +                         dest_pixel, width, source_dx);
   1.351 +#endif
   1.352 +    }
   1.353 +  }
   1.354 +  // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
   1.355 +  if (has_mmx)
   1.356 +    EMMS();
   1.357 +}
   1.358 +
   1.359 +}  // namespace gfx
   1.360 +}  // namespace mozilla

mercurial