gfx/ycbcr/ycbcr_to_rgb565.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/ycbcr/ycbcr_to_rgb565.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,669 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include <stdlib.h>
    1.10 +#include <limits.h>
    1.11 +#include "nsDebug.h"
    1.12 +#include "ycbcr_to_rgb565.h"
    1.13 +#include "nsAlgorithm.h"
    1.14 +
    1.15 +
    1.16 +
    1.17 +#ifdef HAVE_YCBCR_TO_RGB565
    1.18 +
    1.19 +namespace mozilla {
    1.20 +
    1.21 +namespace gfx {
    1.22 +
    1.23 +/*This contains all of the parameters that are needed to convert a row.
    1.24 +  Passing them in a struct instead of as individual parameters saves the need
    1.25 +   to continually push onto the stack the ones that are fixed for every row.*/
    1.26 +struct yuv2rgb565_row_scale_bilinear_ctx{
    1.27 +  uint16_t *rgb_row;
    1.28 +  const uint8_t *y_row;
    1.29 +  const uint8_t *u_row;
    1.30 +  const uint8_t *v_row;
    1.31 +  int y_yweight;
    1.32 +  int y_pitch;
    1.33 +  int width;
    1.34 +  int source_x0_q16;
    1.35 +  int source_dx_q16;
    1.36 +  /*Not used for 4:4:4, except with chroma-nearest.*/
    1.37 +  int source_uv_xoffs_q16;
    1.38 +  /*Not used for 4:4:4 or chroma-nearest.*/
    1.39 +  int uv_pitch;
    1.40 +  /*Not used for 4:2:2, 4:4:4, or chroma-nearest.*/
    1.41 +  int uv_yweight;
    1.42 +};
    1.43 +
    1.44 +
    1.45 +
    1.46 +/*This contains all of the parameters that are needed to convert a row.
    1.47 +  Passing them in a struct instead of as individual parameters saves the need
    1.48 +   to continually push onto the stack the ones that are fixed for every row.*/
    1.49 +struct yuv2rgb565_row_scale_nearest_ctx{
    1.50 +  uint16_t *rgb_row;
    1.51 +  const uint8_t *y_row;
    1.52 +  const uint8_t *u_row;
    1.53 +  const uint8_t *v_row;
    1.54 +  int width;
    1.55 +  int source_x0_q16;
    1.56 +  int source_dx_q16;
    1.57 +  /*Not used for 4:4:4.*/
    1.58 +  int source_uv_xoffs_q16;
    1.59 +};
    1.60 +
    1.61 +
    1.62 +
    1.63 +typedef void (*yuv2rgb565_row_scale_bilinear_func)(
    1.64 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
    1.65 +
    1.66 +typedef void (*yuv2rgb565_row_scale_nearest_func)(
    1.67 + const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither);
    1.68 +
    1.69 +
    1.70 +
    1.71 +# if defined(MOZILLA_MAY_SUPPORT_NEON)
    1.72 +
    1.73 +extern "C" void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON(
    1.74 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
    1.75 +
    1.76 +void __attribute((noinline)) yuv42x_to_rgb565_row_neon(uint16 *dst,
    1.77 +                                                       const uint8 *y,
    1.78 +                                                       const uint8 *u,
    1.79 +                                                       const uint8 *v,
    1.80 +                                                       int n,
    1.81 +                                                       int oddflag);
    1.82 +
    1.83 +#endif
    1.84 +
    1.85 +
    1.86 +
    1.87 +/*Bilinear interpolation of a single value.
    1.88 +  This uses the exact same formulas as the asm, even though it adds some extra
    1.89 +   shifts that do nothing but reduce accuracy.*/
    1.90 +static int bislerp(const uint8_t *row,
    1.91 +                   int pitch,
    1.92 +                   int source_x,
    1.93 +                   int xweight,
    1.94 +                   int yweight) {
    1.95 +  int a;
    1.96 +  int b;
    1.97 +  int c;
    1.98 +  int d;
    1.99 +  a = row[source_x];
   1.100 +  b = row[source_x+1];
   1.101 +  c = row[source_x+pitch];
   1.102 +  d = row[source_x+pitch+1];
   1.103 +  a = ((a<<8)+(c-a)*yweight+128)>>8;
   1.104 +  b = ((b<<8)+(d-b)*yweight+128)>>8;
   1.105 +  return ((a<<8)+(b-a)*xweight+128)>>8;
   1.106 +}
   1.107 +
   1.108 +/*Convert a single pixel from Y'CbCr to RGB565.
   1.109 +  This uses the exact same formulas as the asm, even though we could make the
   1.110 +   constants a lot more accurate with 32-bit wide registers.*/
   1.111 +static uint16_t yu2rgb565(int y, int u, int v, int dither) {
   1.112 +  /*This combines the constant offset that needs to be added during the Y'CbCr
   1.113 +     conversion with a rounding offset that depends on the dither parameter.*/
   1.114 +  static const int DITHER_BIAS[4][3]={
   1.115 +    {-14240,    8704,    -17696},
   1.116 +    {-14240+128,8704+64, -17696+128},
   1.117 +    {-14240+256,8704+128,-17696+256},
   1.118 +    {-14240+384,8704+192,-17696+384}
   1.119 +  };
   1.120 +  int r;
   1.121 +  int g;
   1.122 +  int b;
   1.123 +  r = clamped((74*y+102*v+DITHER_BIAS[dither][0])>>9, 0, 31);
   1.124 +  g = clamped((74*y-25*u-52*v+DITHER_BIAS[dither][1])>>8, 0, 63);
   1.125 +  b = clamped((74*y+129*u+DITHER_BIAS[dither][2])>>9, 0, 31);
   1.126 +  return (uint16_t)(r<<11 | g<<5 | b);
   1.127 +}
   1.128 +
   1.129 +static void ScaleYCbCr420ToRGB565_Bilinear_Row_C(
   1.130 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
   1.131 +  int x;
   1.132 +  int source_x_q16;
   1.133 +  source_x_q16 = ctx->source_x0_q16;
   1.134 +  for (x = 0; x < ctx->width; x++) {
   1.135 +    int source_x;
   1.136 +    int xweight;
   1.137 +    int y;
   1.138 +    int u;
   1.139 +    int v;
   1.140 +    xweight = ((source_x_q16&0xFFFF)+128)>>8;
   1.141 +    source_x = source_x_q16>>16;
   1.142 +    y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.143 +    xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9;
   1.144 +    source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
   1.145 +    source_x_q16 += ctx->source_dx_q16;
   1.146 +    u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight);
   1.147 +    v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight);
   1.148 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.149 +    dither ^= 3;
   1.150 +  }
   1.151 +}
   1.152 +
   1.153 +static void ScaleYCbCr422ToRGB565_Bilinear_Row_C(
   1.154 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
   1.155 +  int x;
   1.156 +  int source_x_q16;
   1.157 +  source_x_q16 = ctx->source_x0_q16;
   1.158 +  for (x = 0; x < ctx->width; x++) {
   1.159 +    int source_x;
   1.160 +    int xweight;
   1.161 +    int y;
   1.162 +    int u;
   1.163 +    int v;
   1.164 +    xweight = ((source_x_q16&0xFFFF)+128)>>8;
   1.165 +    source_x = source_x_q16>>16;
   1.166 +    y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.167 +    xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9;
   1.168 +    source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
   1.169 +    source_x_q16 += ctx->source_dx_q16;
   1.170 +    u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight);
   1.171 +    v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight);
   1.172 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.173 +    dither ^= 3;
   1.174 +  }
   1.175 +}
   1.176 +
   1.177 +static void ScaleYCbCr444ToRGB565_Bilinear_Row_C(
   1.178 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
   1.179 +  int x;
   1.180 +  int source_x_q16;
   1.181 +  source_x_q16 = ctx->source_x0_q16;
   1.182 +  for (x = 0; x < ctx->width; x++) {
   1.183 +    int source_x;
   1.184 +    int xweight;
   1.185 +    int y;
   1.186 +    int u;
   1.187 +    int v;
   1.188 +    xweight = ((source_x_q16&0xFFFF)+128)>>8;
   1.189 +    source_x = source_x_q16>>16;
   1.190 +    source_x_q16 += ctx->source_dx_q16;
   1.191 +    y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.192 +    u = bislerp(ctx->u_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.193 +    v = bislerp(ctx->v_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.194 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.195 +    dither ^= 3;
   1.196 +  }
   1.197 +}
   1.198 +
   1.199 +static void ScaleYCbCr42xToRGB565_BilinearY_Row_C(
   1.200 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
   1.201 +  int x;
   1.202 +  int source_x_q16;
   1.203 +  source_x_q16 = ctx->source_x0_q16;
   1.204 +  for (x = 0; x < ctx->width; x++) {
   1.205 +    int source_x;
   1.206 +    int xweight;
   1.207 +    int y;
   1.208 +    int u;
   1.209 +    int v;
   1.210 +    xweight = ((source_x_q16&0xFFFF)+128)>>8;
   1.211 +    source_x = source_x_q16>>16;
   1.212 +    y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.213 +    source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
   1.214 +    source_x_q16 += ctx->source_dx_q16;
   1.215 +    u = ctx->u_row[source_x];
   1.216 +    v = ctx->v_row[source_x];
   1.217 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.218 +    dither ^= 3;
   1.219 +  }
   1.220 +}
   1.221 +
   1.222 +static void ScaleYCbCr444ToRGB565_BilinearY_Row_C(
   1.223 + const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
   1.224 +  int x;
   1.225 +  int source_x_q16;
   1.226 +  source_x_q16 = ctx->source_x0_q16;
   1.227 +  for (x = 0; x < ctx->width; x++) {
   1.228 +    int source_x;
   1.229 +    int xweight;
   1.230 +    int y;
   1.231 +    int u;
   1.232 +    int v;
   1.233 +    xweight = ((source_x_q16&0xFFFF)+128)>>8;
   1.234 +    source_x = source_x_q16>>16;
   1.235 +    y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
   1.236 +    source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>16;
   1.237 +    source_x_q16 += ctx->source_dx_q16;
   1.238 +    u = ctx->u_row[source_x];
   1.239 +    v = ctx->v_row[source_x];
   1.240 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.241 +    dither ^= 3;
   1.242 +  }
   1.243 +}
   1.244 +
   1.245 +static void ScaleYCbCr42xToRGB565_Nearest_Row_C(
   1.246 + const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){
   1.247 +  int y;
   1.248 +  int u;
   1.249 +  int v;
   1.250 +  int x;
   1.251 +  int source_x_q16;
   1.252 +  int source_x;
   1.253 +  source_x_q16 = ctx->source_x0_q16;
   1.254 +  for (x = 0; x < ctx->width; x++) {
   1.255 +    source_x = source_x_q16>>16;
   1.256 +    y = ctx->y_row[source_x];
   1.257 +    source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
   1.258 +    source_x_q16 += ctx->source_dx_q16;
   1.259 +    u = ctx->u_row[source_x];
   1.260 +    v = ctx->v_row[source_x];
   1.261 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.262 +    dither ^= 3;
   1.263 +  }
   1.264 +}
   1.265 +
   1.266 +static void ScaleYCbCr444ToRGB565_Nearest_Row_C(
   1.267 + const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){
   1.268 +  int y;
   1.269 +  int u;
   1.270 +  int v;
   1.271 +  int x;
   1.272 +  int source_x_q16;
   1.273 +  int source_x;
   1.274 +  source_x_q16 = ctx->source_x0_q16;
   1.275 +  for (x = 0; x < ctx->width; x++) {
   1.276 +    source_x = source_x_q16>>16;
   1.277 +    source_x_q16 += ctx->source_dx_q16;
   1.278 +    y = ctx->y_row[source_x];
   1.279 +    u = ctx->u_row[source_x];
   1.280 +    v = ctx->v_row[source_x];
   1.281 +    ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
   1.282 +    dither ^= 3;
   1.283 +  }
   1.284 +}
   1.285 +
   1.286 +NS_GFX_(void) ScaleYCbCrToRGB565(const uint8_t *y_buf,
   1.287 +                                 const uint8_t *u_buf,
   1.288 +                                 const uint8_t *v_buf,
   1.289 +                                 uint8_t *rgb_buf,
   1.290 +                                 int source_x0,
   1.291 +                                 int source_y0,
   1.292 +                                 int source_width,
   1.293 +                                 int source_height,
   1.294 +                                 int width,
   1.295 +                                 int height,
   1.296 +                                 int y_pitch,
   1.297 +                                 int uv_pitch,
   1.298 +                                 int rgb_pitch,
   1.299 +                                 YUVType yuv_type,
   1.300 +                                 ScaleFilter filter) {
   1.301 +  int source_x0_q16;
   1.302 +  int source_y0_q16;
   1.303 +  int source_dx_q16;
   1.304 +  int source_dy_q16;
   1.305 +  int source_uv_xoffs_q16;
   1.306 +  int source_uv_yoffs_q16;
   1.307 +  int x_shift;
   1.308 +  int y_shift;
   1.309 +  int ymin;
   1.310 +  int ymax;
   1.311 +  int uvmin;
   1.312 +  int uvmax;
   1.313 +  int dither;
   1.314 +  /*We don't support negative destination rectangles (just flip the source
   1.315 +     instead), and for empty ones there's nothing to do.*/
   1.316 +  if (width <= 0 || height <= 0)
   1.317 +    return;
   1.318 +  /*These bounds are required to avoid 16.16 fixed-point overflow.*/
   1.319 +  NS_ASSERTION(source_x0 > (INT_MIN>>16) && source_x0 < (INT_MAX>>16),
   1.320 +    "ScaleYCbCrToRGB565 source X offset out of bounds.");
   1.321 +  NS_ASSERTION(source_x0+source_width > (INT_MIN>>16)
   1.322 +            && source_x0+source_width < (INT_MAX>>16),
   1.323 +    "ScaleYCbCrToRGB565 source width out of bounds.");
   1.324 +  NS_ASSERTION(source_y0 > (INT_MIN>>16) && source_y0 < (INT_MAX>>16),
   1.325 +    "ScaleYCbCrToRGB565 source Y offset out of bounds.");
   1.326 +  NS_ASSERTION(source_y0+source_height > (INT_MIN>>16)
   1.327 +            && source_y0+source_height < (INT_MAX>>16),
   1.328 +    "ScaleYCbCrToRGB565 source height out of bounds.");
   1.329 +  /*We require the same stride for Y' and Cb and Cr for 4:4:4 content.*/
   1.330 +  NS_ASSERTION(yuv_type != YV24 || y_pitch == uv_pitch,
   1.331 +    "ScaleYCbCrToRGB565 luma stride differs from chroma for 4:4:4 content.");
   1.332 +  /*We assume we can read outside the bounds of the input, because it makes
   1.333 +     the code much simpler (and in practice is true: both Theora and VP8 return
   1.334 +     padded reference frames).
   1.335 +    In practice, we do not even _have_ the actual bounds of the source, as
   1.336 +     we are passed a crop rectangle from it, and not the dimensions of the full
   1.337 +     image.
   1.338 +    This assertion will not guarantee our out-of-bounds reads are safe, but it
   1.339 +     should at least catch the simple case of passing in an unpadded buffer.*/
   1.340 +  NS_ASSERTION(abs(y_pitch) >= abs(source_width)+16,
   1.341 +    "ScaleYCbCrToRGB565 source image unpadded?");
   1.342 +  /*The NEON code requires the pointers to be aligned to a 16-byte boundary at
   1.343 +     the start of each row.
   1.344 +    This should be true for all of our sources.
   1.345 +    We could try to fix this up if it's not true by adjusting source_x0, but
   1.346 +     that would require the mis-alignment to be the same for the U and V
   1.347 +     planes.*/
   1.348 +  NS_ASSERTION((y_pitch&15) == 0 && (uv_pitch&15) == 0 &&
   1.349 +   ((y_buf-(uint8_t *)nullptr)&15) == 0 &&
   1.350 +   ((u_buf-(uint8_t *)nullptr)&15) == 0 &&
   1.351 +   ((v_buf-(uint8_t *)nullptr)&15) == 0,
   1.352 +   "ScaleYCbCrToRGB565 source image unaligned");
   1.353 +  /*We take an area-based approach to pixel coverage to avoid shifting by small
   1.354 +     amounts (or not so small, when up-scaling or down-scaling by a large
   1.355 +     factor).
   1.356 +
   1.357 +    An illustrative example: scaling 4:2:0 up by 2, using JPEG chroma cositing^.
   1.358 +
   1.359 +    + = RGB destination locations
   1.360 +    * = Y' source locations
   1.361 +    - = Cb, Cr source locations
   1.362 +
   1.363 +    +   +   +   +  +   +   +   +
   1.364 +      *       *      *       *
   1.365 +    +   +   +   +  +   +   +   +
   1.366 +          -              -
   1.367 +    +   +   +   +  +   +   +   +
   1.368 +      *       *      *       *
   1.369 +    +   +   +   +  +   +   +   +
   1.370 +
   1.371 +    +   +   +   +  +   +   +   +
   1.372 +      *       *      *       *
   1.373 +    +   +   +   +  +   +   +   +
   1.374 +          -              -
   1.375 +    +   +   +   +  +   +   +   +
   1.376 +      *       *      *       *
   1.377 +    +   +   +   +  +   +   +   +
   1.378 +
   1.379 +    So, the coordinates of the upper-left + (first destination site) should
   1.380 +     be (-0.25,-0.25) in the source Y' coordinate system.
   1.381 +    Similarly, the coordinates should be (-0.375,-0.375) in the source Cb, Cr
   1.382 +     coordinate system.
   1.383 +    Note that the origin and scale of these two coordinate systems is not the
   1.384 +     same!
   1.385 +
   1.386 +    ^JPEG cositing is required for Theora; VP8 doesn't specify cositing rules,
   1.387 +     but nearly all software converters in existence (at least those that are
   1.388 +     open source, and many that are not) use JPEG cositing instead of MPEG.*/
   1.389 +  source_dx_q16 = (source_width<<16) / width;
   1.390 +  source_x0_q16 = (source_x0<<16)+(source_dx_q16>>1)-0x8000;
   1.391 +  source_dy_q16 = (source_height<<16) / height;
   1.392 +  source_y0_q16 = (source_y0<<16)+(source_dy_q16>>1)-0x8000;
   1.393 +  x_shift = (yuv_type != YV24);
   1.394 +  y_shift = (yuv_type == YV12);
   1.395 +  /*These two variables hold the difference between the origins of the Y' and
   1.396 +     the Cb, Cr coordinate systems, using the scale of the Y' coordinate
   1.397 +     system.*/
   1.398 +  source_uv_xoffs_q16 = -(x_shift<<15);
   1.399 +  source_uv_yoffs_q16 = -(y_shift<<15);
   1.400 +  /*Compute the range of source rows we'll actually use.
   1.401 +    This doesn't guarantee we won't read outside this range.*/
   1.402 +  ymin = source_height >= 0 ? source_y0 : source_y0+source_height-1;
   1.403 +  ymax = source_height >= 0 ? source_y0+source_height-1 : source_y0;
   1.404 +  uvmin = ymin>>y_shift;
   1.405 +  uvmax = ((ymax+1+y_shift)>>y_shift)-1;
   1.406 +  /*Pick a dithering pattern.
   1.407 +    The "&3" at the end is just in case RAND_MAX is lying.*/
   1.408 +  dither = (rand()/(RAND_MAX>>2))&3;
   1.409 +  /*Nearest-neighbor scaling.*/
   1.410 +  if (filter == FILTER_NONE) {
   1.411 +    yuv2rgb565_row_scale_nearest_ctx ctx;
   1.412 +    yuv2rgb565_row_scale_nearest_func scale_row;
   1.413 +    int y;
   1.414 +    /*Add rounding offsets once, in advance.*/
   1.415 +    source_x0_q16 += 0x8000;
   1.416 +    source_y0_q16 += 0x8000;
   1.417 +    source_uv_xoffs_q16 += (x_shift<<15);
   1.418 +    source_uv_yoffs_q16 += (y_shift<<15);
   1.419 +    if (yuv_type == YV12)
   1.420 +      scale_row = ScaleYCbCr42xToRGB565_Nearest_Row_C;
   1.421 +    else
   1.422 +      scale_row = ScaleYCbCr444ToRGB565_Nearest_Row_C;
   1.423 +    ctx.width = width;
   1.424 +    ctx.source_x0_q16 = source_x0_q16;
   1.425 +    ctx.source_dx_q16 = source_dx_q16;
   1.426 +    ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16;
   1.427 +    for (y=0; y<height; y++) {
   1.428 +      int source_y;
   1.429 +      ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch);
   1.430 +      source_y = source_y0_q16>>16;
   1.431 +      source_y = clamped(source_y, ymin, ymax);
   1.432 +      ctx.y_row = y_buf + source_y*y_pitch;
   1.433 +      source_y = (source_y0_q16+source_uv_yoffs_q16)>>(16+y_shift);
   1.434 +      source_y = clamped(source_y, uvmin, uvmax);
   1.435 +      source_y0_q16 += source_dy_q16;
   1.436 +      ctx.u_row = u_buf + source_y*uv_pitch;
   1.437 +      ctx.v_row = v_buf + source_y*uv_pitch;
   1.438 +      (*scale_row)(&ctx, dither);
   1.439 +      dither ^= 2;
   1.440 +    }
   1.441 +  }
   1.442 +  /*Bilinear scaling.*/
   1.443 +  else {
   1.444 +    yuv2rgb565_row_scale_bilinear_ctx ctx;
   1.445 +    yuv2rgb565_row_scale_bilinear_func scale_row;
   1.446 +    int uvxscale_min;
   1.447 +    int uvxscale_max;
   1.448 +    int uvyscale_min;
   1.449 +    int uvyscale_max;
   1.450 +    int y;
   1.451 +    /*Check how close the chroma scaling is to unity.
   1.452 +      If it's close enough, we can get away with nearest-neighbor chroma
   1.453 +       sub-sampling, and only doing bilinear on luma.
   1.454 +      If a given axis is subsampled, we use bounds on the luma step of
   1.455 +       [0.67...2], which is equivalent to scaling chroma by [1...3].
   1.456 +      If it's not subsampled, we use bounds of [0.5...1.33], which is
   1.457 +       equivalent to scaling chroma by [0.75...2].
   1.458 +      The lower bound is chosen as a trade-off between speed and how terrible
   1.459 +       nearest neighbor looks when upscaling.*/
   1.460 +# define CHROMA_NEAREST_SUBSAMP_STEP_MIN  0xAAAA
   1.461 +# define CHROMA_NEAREST_NORMAL_STEP_MIN   0x8000
   1.462 +# define CHROMA_NEAREST_SUBSAMP_STEP_MAX 0x20000
   1.463 +# define CHROMA_NEAREST_NORMAL_STEP_MAX  0x15555
   1.464 +    uvxscale_min = yuv_type != YV24 ?
   1.465 +     CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
   1.466 +    uvxscale_max = yuv_type != YV24 ?
   1.467 +     CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
   1.468 +    uvyscale_min = yuv_type == YV12 ?
   1.469 +     CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
   1.470 +    uvyscale_max = yuv_type == YV12 ?
   1.471 +     CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
   1.472 +    if (uvxscale_min <= abs(source_dx_q16)
   1.473 +     && abs(source_dx_q16) <= uvxscale_max
   1.474 +     && uvyscale_min <= abs(source_dy_q16)
   1.475 +     && abs(source_dy_q16) <= uvyscale_max) {
   1.476 +      /*Add the rounding offsets now.*/
   1.477 +      source_uv_xoffs_q16 += 1<<(15+x_shift);
   1.478 +      source_uv_yoffs_q16 += 1<<(15+y_shift);
   1.479 +      if (yuv_type != YV24) {
   1.480 +        scale_row =
   1.481 +#  if defined(MOZILLA_MAY_SUPPORT_NEON)
   1.482 +         supports_neon() ? ScaleYCbCr42xToRGB565_BilinearY_Row_NEON :
   1.483 +#  endif
   1.484 +         ScaleYCbCr42xToRGB565_BilinearY_Row_C;
   1.485 +      }
   1.486 +      else
   1.487 +        scale_row = ScaleYCbCr444ToRGB565_BilinearY_Row_C;
   1.488 +    }
   1.489 +    else {
   1.490 +      if (yuv_type == YV12)
   1.491 +        scale_row = ScaleYCbCr420ToRGB565_Bilinear_Row_C;
   1.492 +      else if (yuv_type == YV16)
   1.493 +        scale_row = ScaleYCbCr422ToRGB565_Bilinear_Row_C;
   1.494 +      else
   1.495 +        scale_row = ScaleYCbCr444ToRGB565_Bilinear_Row_C;
   1.496 +    }
   1.497 +    ctx.width = width;
   1.498 +    ctx.y_pitch = y_pitch;
   1.499 +    ctx.source_x0_q16 = source_x0_q16;
   1.500 +    ctx.source_dx_q16 = source_dx_q16;
   1.501 +    ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16;
   1.502 +    ctx.uv_pitch = uv_pitch;
   1.503 +    for (y=0; y<height; y++) {
   1.504 +      int source_y;
   1.505 +      int yweight;
   1.506 +      int uvweight;
   1.507 +      ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch);
   1.508 +      source_y = (source_y0_q16+128)>>16;
   1.509 +      yweight = ((source_y0_q16+128)>>8)&0xFF;
   1.510 +      if (source_y < ymin) {
   1.511 +        source_y = ymin;
   1.512 +        yweight = 0;
   1.513 +      }
   1.514 +      if (source_y > ymax) {
   1.515 +        source_y = ymax;
   1.516 +        yweight = 0;
   1.517 +      }
   1.518 +      ctx.y_row = y_buf + source_y*y_pitch;
   1.519 +      source_y = source_y0_q16+source_uv_yoffs_q16+(128<<y_shift);
   1.520 +      source_y0_q16 += source_dy_q16;
   1.521 +      uvweight = source_y>>(8+y_shift)&0xFF;
   1.522 +      source_y >>= 16+y_shift;
   1.523 +      if (source_y < uvmin) {
   1.524 +        source_y = uvmin;
   1.525 +        uvweight = 0;
   1.526 +      }
   1.527 +      if (source_y > uvmax) {
   1.528 +        source_y = uvmax;
   1.529 +        uvweight = 0;
   1.530 +      }
   1.531 +      ctx.u_row = u_buf + source_y*uv_pitch;
   1.532 +      ctx.v_row = v_buf + source_y*uv_pitch;
   1.533 +      ctx.y_yweight = yweight;
   1.534 +      ctx.uv_yweight = uvweight;
   1.535 +      (*scale_row)(&ctx, dither);
   1.536 +      dither ^= 2;
   1.537 +    }
   1.538 +  }
   1.539 +}
   1.540 +
   1.541 +NS_GFX_(bool) IsScaleYCbCrToRGB565Fast(int source_x0,
   1.542 +                                       int source_y0,
   1.543 +                                       int source_width,
   1.544 +                                       int source_height,
   1.545 +                                       int width,
   1.546 +                                       int height,
   1.547 +                                       YUVType yuv_type,
   1.548 +                                       ScaleFilter filter)
   1.549 +{
   1.550 +  // Very fast.
   1.551 +  if (width <= 0 || height <= 0)
   1.552 +    return true;
   1.553 +#  if defined(MOZILLA_MAY_SUPPORT_NEON)
   1.554 +  if (filter != FILTER_NONE) {
   1.555 +    int source_dx_q16;
   1.556 +    int source_dy_q16;
   1.557 +    int uvxscale_min;
   1.558 +    int uvxscale_max;
   1.559 +    int uvyscale_min;
   1.560 +    int uvyscale_max;
   1.561 +    source_dx_q16 = (source_width<<16) / width;
   1.562 +    source_dy_q16 = (source_height<<16) / height;
   1.563 +    uvxscale_min = yuv_type != YV24 ?
   1.564 +     CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
   1.565 +    uvxscale_max = yuv_type != YV24 ?
   1.566 +     CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
   1.567 +    uvyscale_min = yuv_type == YV12 ?
   1.568 +     CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
   1.569 +    uvyscale_max = yuv_type == YV12 ?
   1.570 +     CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
   1.571 +    if (uvxscale_min <= abs(source_dx_q16)
   1.572 +     && abs(source_dx_q16) <= uvxscale_max
   1.573 +     && uvyscale_min <= abs(source_dy_q16)
   1.574 +     && abs(source_dy_q16) <= uvyscale_max) {
   1.575 +      if (yuv_type != YV24)
   1.576 +        return supports_neon();
   1.577 +    }
   1.578 +  }
   1.579 +#  endif
   1.580 +  return false;
   1.581 +}
   1.582 +
   1.583 +
   1.584 +
   1.585 +void yuv_to_rgb565_row_c(uint16 *dst,
   1.586 +                         const uint8 *y,
   1.587 +                         const uint8 *u,
   1.588 +                         const uint8 *v,
   1.589 +                         int x_shift,
   1.590 +                         int pic_x,
   1.591 +                         int pic_width)
   1.592 +{
   1.593 +  int x;
   1.594 +  for (x = 0; x < pic_width; x++)
   1.595 +  {
   1.596 +    dst[x] = yu2rgb565(y[pic_x+x],
   1.597 +                       u[(pic_x+x)>>x_shift],
   1.598 +                       v[(pic_x+x)>>x_shift],
   1.599 +                       2); // Disable dithering for now.
   1.600 +  }
   1.601 +}
   1.602 +
   1.603 +NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* y_buf,
   1.604 +                                   const uint8* u_buf,
   1.605 +                                   const uint8* v_buf,
   1.606 +                                   uint8* rgb_buf,
   1.607 +                                   int pic_x,
   1.608 +                                   int pic_y,
   1.609 +                                   int pic_width,
   1.610 +                                   int pic_height,
   1.611 +                                   int y_pitch,
   1.612 +                                   int uv_pitch,
   1.613 +                                   int rgb_pitch,
   1.614 +                                   YUVType yuv_type)
   1.615 +{
   1.616 +  int x_shift;
   1.617 +  int y_shift;
   1.618 +  x_shift = yuv_type != YV24;
   1.619 +  y_shift = yuv_type == YV12;
   1.620 +#  ifdef MOZILLA_MAY_SUPPORT_NEON
   1.621 +  if (yuv_type != YV24 && supports_neon())
   1.622 +  {
   1.623 +    for (int i = 0; i < pic_height; i++) {
   1.624 +      int yoffs;
   1.625 +      int uvoffs;
   1.626 +      yoffs = y_pitch * (pic_y+i) + pic_x;
   1.627 +      uvoffs = uv_pitch * ((pic_y+i)>>y_shift) + (pic_x>>x_shift);
   1.628 +      yuv42x_to_rgb565_row_neon((uint16*)(rgb_buf + rgb_pitch * i),
   1.629 +                                y_buf + yoffs,
   1.630 +                                u_buf + uvoffs,
   1.631 +                                v_buf + uvoffs,
   1.632 +                                pic_width,
   1.633 +                                pic_x&x_shift);
   1.634 +    }
   1.635 +  }
   1.636 +  else
   1.637 +#  endif
   1.638 +  {
   1.639 +    for (int i = 0; i < pic_height; i++) {
   1.640 +      int yoffs;
   1.641 +      int uvoffs;
   1.642 +      yoffs = y_pitch * (pic_y+i);
   1.643 +      uvoffs = uv_pitch * ((pic_y+i)>>y_shift);
   1.644 +      yuv_to_rgb565_row_c((uint16*)(rgb_buf + rgb_pitch * i),
   1.645 +                          y_buf + yoffs,
   1.646 +                          u_buf + uvoffs,
   1.647 +                          v_buf + uvoffs,
   1.648 +                          x_shift,
   1.649 +                          pic_x,
   1.650 +                          pic_width);
   1.651 +    }
   1.652 +  }
   1.653 +}
   1.654 +
   1.655 +NS_GFX_(bool) IsConvertYCbCrToRGB565Fast(int pic_x,
   1.656 +                                         int pic_y,
   1.657 +                                         int pic_width,
   1.658 +                                         int pic_height,
   1.659 +                                         YUVType yuv_type)
   1.660 +{
   1.661 +#  if defined(MOZILLA_MAY_SUPPORT_NEON)
   1.662 +  return (yuv_type != YV24 && supports_neon());
   1.663 +#  else
   1.664 +  return false;
   1.665 +#  endif
   1.666 +}
   1.667 +
   1.668 +} // namespace gfx
   1.669 +
   1.670 +} // namespace mozilla
   1.671 +
   1.672 +#endif // HAVE_YCBCR_TO_RGB565

mercurial