michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include <stdlib.h>
michael@0: #include <limits.h>
michael@0: #include "nsDebug.h"
michael@0: #include "ycbcr_to_rgb565.h"
michael@0: #include "nsAlgorithm.h"
michael@0: 
michael@0: 
michael@0: 
michael@0: #ifdef HAVE_YCBCR_TO_RGB565
michael@0: 
michael@0: namespace mozilla {
michael@0: 
michael@0: namespace gfx {
michael@0: 
michael@0: /*This contains all of the parameters that are needed to convert a row.
michael@0:   Passing them in a struct instead of as individual parameters saves the need
michael@0:    to continually push onto the stack the ones that are fixed for every row.*/
michael@0: struct yuv2rgb565_row_scale_bilinear_ctx{
michael@0:   uint16_t *rgb_row;
michael@0:   const uint8_t *y_row;
michael@0:   const uint8_t *u_row;
michael@0:   const uint8_t *v_row;
michael@0:   int y_yweight;
michael@0:   int y_pitch;
michael@0:   int width;
michael@0:   int source_x0_q16;
michael@0:   int source_dx_q16;
michael@0:   /*Not used for 4:4:4, except with chroma-nearest.*/
michael@0:   int source_uv_xoffs_q16;
michael@0:   /*Not used for 4:4:4 or chroma-nearest.*/
michael@0:   int uv_pitch;
michael@0:   /*Not used for 4:2:2, 4:4:4, or chroma-nearest.*/
michael@0:   int uv_yweight;
michael@0: };
michael@0: 
michael@0: 
michael@0: 
michael@0: /*This contains all of the parameters that are needed to convert a row.
michael@0:   Passing them in a struct instead of as individual parameters saves the need
michael@0:    to continually push onto the stack the ones that are fixed for every row.*/
michael@0: struct yuv2rgb565_row_scale_nearest_ctx{
michael@0:   uint16_t *rgb_row;
michael@0:   const uint8_t *y_row;
michael@0:   const uint8_t *u_row;
michael@0:   const uint8_t *v_row;
michael@0:   int width;
michael@0:   int source_x0_q16;
michael@0:   int source_dx_q16;
michael@0:   /*Not used for 4:4:4.*/
michael@0:   int source_uv_xoffs_q16;
michael@0: };
michael@0: 
michael@0: 
michael@0: 
michael@0: typedef void (*yuv2rgb565_row_scale_bilinear_func)(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
michael@0: 
michael@0: typedef void (*yuv2rgb565_row_scale_nearest_func)(
michael@0:  const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither);
michael@0: 
michael@0: 
michael@0: 
michael@0: # if defined(MOZILLA_MAY_SUPPORT_NEON)
michael@0: 
michael@0: extern "C" void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither);
michael@0: 
michael@0: void __attribute((noinline)) yuv42x_to_rgb565_row_neon(uint16 *dst,
michael@0:                                                        const uint8 *y,
michael@0:                                                        const uint8 *u,
michael@0:                                                        const uint8 *v,
michael@0:                                                        int n,
michael@0:                                                        int oddflag);
michael@0: 
michael@0: #endif
michael@0: 
michael@0: 
michael@0: 
michael@0: /*Bilinear interpolation of a single value.
michael@0:   This uses the exact same formulas as the asm, even though it adds some extra
michael@0:    shifts that do nothing but reduce accuracy.*/
michael@0: static int bislerp(const uint8_t *row,
michael@0:                    int pitch,
michael@0:                    int source_x,
michael@0:                    int xweight,
michael@0:                    int yweight) {
michael@0:   int a;
michael@0:   int b;
michael@0:   int c;
michael@0:   int d;
michael@0:   a = row[source_x];
michael@0:   b = row[source_x+1];
michael@0:   c = row[source_x+pitch];
michael@0:   d = row[source_x+pitch+1];
michael@0:   a = ((a<<8)+(c-a)*yweight+128)>>8;
michael@0:   b = ((b<<8)+(d-b)*yweight+128)>>8;
michael@0:   return ((a<<8)+(b-a)*xweight+128)>>8;
michael@0: }
michael@0: 
michael@0: /*Convert a single pixel from Y'CbCr to RGB565.
michael@0:   This uses the exact same formulas as the asm, even though we could make the
michael@0:    constants a lot more accurate with 32-bit wide registers.*/
michael@0: static uint16_t yu2rgb565(int y, int u, int v, int dither) {
michael@0:   /*This combines the constant offset that needs to be added during the Y'CbCr
michael@0:      conversion with a rounding offset that depends on the dither parameter.*/
michael@0:   static const int DITHER_BIAS[4][3]={
michael@0:     {-14240,    8704,    -17696},
michael@0:     {-14240+128,8704+64, -17696+128},
michael@0:     {-14240+256,8704+128,-17696+256},
michael@0:     {-14240+384,8704+192,-17696+384}
michael@0:   };
michael@0:   int r;
michael@0:   int g;
michael@0:   int b;
michael@0:   r = clamped((74*y+102*v+DITHER_BIAS[dither][0])>>9, 0, 31);
michael@0:   g = clamped((74*y-25*u-52*v+DITHER_BIAS[dither][1])>>8, 0, 63);
michael@0:   b = clamped((74*y+129*u+DITHER_BIAS[dither][2])>>9, 0, 31);
michael@0:   return (uint16_t)(r<<11 | g<<5 | b);
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr420ToRGB565_Bilinear_Row_C(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     int source_x;
michael@0:     int xweight;
michael@0:     int y;
michael@0:     int u;
michael@0:     int v;
michael@0:     xweight = ((source_x_q16&0xFFFF)+128)>>8;
michael@0:     source_x = source_x_q16>>16;
michael@0:     y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9;
michael@0:     source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight);
michael@0:     v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight);
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr422ToRGB565_Bilinear_Row_C(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     int source_x;
michael@0:     int xweight;
michael@0:     int y;
michael@0:     int u;
michael@0:     int v;
michael@0:     xweight = ((source_x_q16&0xFFFF)+128)>>8;
michael@0:     source_x = source_x_q16>>16;
michael@0:     y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9;
michael@0:     source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr444ToRGB565_Bilinear_Row_C(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     int source_x;
michael@0:     int xweight;
michael@0:     int y;
michael@0:     int u;
michael@0:     int v;
michael@0:     xweight = ((source_x_q16&0xFFFF)+128)>>8;
michael@0:     source_x = source_x_q16>>16;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     u = bislerp(ctx->u_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     v = bislerp(ctx->v_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr42xToRGB565_BilinearY_Row_C(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     int source_x;
michael@0:     int xweight;
michael@0:     int y;
michael@0:     int u;
michael@0:     int v;
michael@0:     xweight = ((source_x_q16&0xFFFF)+128)>>8;
michael@0:     source_x = source_x_q16>>16;
michael@0:     y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     u = ctx->u_row[source_x];
michael@0:     v = ctx->v_row[source_x];
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr444ToRGB565_BilinearY_Row_C(
michael@0:  const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     int source_x;
michael@0:     int xweight;
michael@0:     int y;
michael@0:     int u;
michael@0:     int v;
michael@0:     xweight = ((source_x_q16&0xFFFF)+128)>>8;
michael@0:     source_x = source_x_q16>>16;
michael@0:     y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight);
michael@0:     source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>16;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     u = ctx->u_row[source_x];
michael@0:     v = ctx->v_row[source_x];
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr42xToRGB565_Nearest_Row_C(
michael@0:  const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){
michael@0:   int y;
michael@0:   int u;
michael@0:   int v;
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   int source_x;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     source_x = source_x_q16>>16;
michael@0:     y = ctx->y_row[source_x];
michael@0:     source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     u = ctx->u_row[source_x];
michael@0:     v = ctx->v_row[source_x];
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: static void ScaleYCbCr444ToRGB565_Nearest_Row_C(
michael@0:  const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){
michael@0:   int y;
michael@0:   int u;
michael@0:   int v;
michael@0:   int x;
michael@0:   int source_x_q16;
michael@0:   int source_x;
michael@0:   source_x_q16 = ctx->source_x0_q16;
michael@0:   for (x = 0; x < ctx->width; x++) {
michael@0:     source_x = source_x_q16>>16;
michael@0:     source_x_q16 += ctx->source_dx_q16;
michael@0:     y = ctx->y_row[source_x];
michael@0:     u = ctx->u_row[source_x];
michael@0:     v = ctx->v_row[source_x];
michael@0:     ctx->rgb_row[x] = yu2rgb565(y, u, v, dither);
michael@0:     dither ^= 3;
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_GFX_(void) ScaleYCbCrToRGB565(const uint8_t *y_buf,
michael@0:                                  const uint8_t *u_buf,
michael@0:                                  const uint8_t *v_buf,
michael@0:                                  uint8_t *rgb_buf,
michael@0:                                  int source_x0,
michael@0:                                  int source_y0,
michael@0:                                  int source_width,
michael@0:                                  int source_height,
michael@0:                                  int width,
michael@0:                                  int height,
michael@0:                                  int y_pitch,
michael@0:                                  int uv_pitch,
michael@0:                                  int rgb_pitch,
michael@0:                                  YUVType yuv_type,
michael@0:                                  ScaleFilter filter) {
michael@0:   int source_x0_q16;
michael@0:   int source_y0_q16;
michael@0:   int source_dx_q16;
michael@0:   int source_dy_q16;
michael@0:   int source_uv_xoffs_q16;
michael@0:   int source_uv_yoffs_q16;
michael@0:   int x_shift;
michael@0:   int y_shift;
michael@0:   int ymin;
michael@0:   int ymax;
michael@0:   int uvmin;
michael@0:   int uvmax;
michael@0:   int dither;
michael@0:   /*We don't support negative destination rectangles (just flip the source
michael@0:      instead), and for empty ones there's nothing to do.*/
michael@0:   if (width <= 0 || height <= 0)
michael@0:     return;
michael@0:   /*These bounds are required to avoid 16.16 fixed-point overflow.*/
michael@0:   NS_ASSERTION(source_x0 > (INT_MIN>>16) && source_x0 < (INT_MAX>>16),
michael@0:     "ScaleYCbCrToRGB565 source X offset out of bounds.");
michael@0:   NS_ASSERTION(source_x0+source_width > (INT_MIN>>16)
michael@0:             && source_x0+source_width < (INT_MAX>>16),
michael@0:     "ScaleYCbCrToRGB565 source width out of bounds.");
michael@0:   NS_ASSERTION(source_y0 > (INT_MIN>>16) && source_y0 < (INT_MAX>>16),
michael@0:     "ScaleYCbCrToRGB565 source Y offset out of bounds.");
michael@0:   NS_ASSERTION(source_y0+source_height > (INT_MIN>>16)
michael@0:             && source_y0+source_height < (INT_MAX>>16),
michael@0:     "ScaleYCbCrToRGB565 source height out of bounds.");
michael@0:   /*We require the same stride for Y' and Cb and Cr for 4:4:4 content.*/
michael@0:   NS_ASSERTION(yuv_type != YV24 || y_pitch == uv_pitch,
michael@0:     "ScaleYCbCrToRGB565 luma stride differs from chroma for 4:4:4 content.");
michael@0:   /*We assume we can read outside the bounds of the input, because it makes
michael@0:      the code much simpler (and in practice is true: both Theora and VP8 return
michael@0:      padded reference frames).
michael@0:     In practice, we do not even _have_ the actual bounds of the source, as
michael@0:      we are passed a crop rectangle from it, and not the dimensions of the full
michael@0:      image.
michael@0:     This assertion will not guarantee our out-of-bounds reads are safe, but it
michael@0:      should at least catch the simple case of passing in an unpadded buffer.*/
michael@0:   NS_ASSERTION(abs(y_pitch) >= abs(source_width)+16,
michael@0:     "ScaleYCbCrToRGB565 source image unpadded?");
michael@0:   /*The NEON code requires the pointers to be aligned to a 16-byte boundary at
michael@0:      the start of each row.
michael@0:     This should be true for all of our sources.
michael@0:     We could try to fix this up if it's not true by adjusting source_x0, but
michael@0:      that would require the mis-alignment to be the same for the U and V
michael@0:      planes.*/
michael@0:   NS_ASSERTION((y_pitch&15) == 0 && (uv_pitch&15) == 0 &&
michael@0:    ((y_buf-(uint8_t *)nullptr)&15) == 0 &&
michael@0:    ((u_buf-(uint8_t *)nullptr)&15) == 0 &&
michael@0:    ((v_buf-(uint8_t *)nullptr)&15) == 0,
michael@0:    "ScaleYCbCrToRGB565 source image unaligned");
michael@0:   /*We take an area-based approach to pixel coverage to avoid shifting by small
michael@0:      amounts (or not so small, when up-scaling or down-scaling by a large
michael@0:      factor).
michael@0: 
michael@0:     An illustrative example: scaling 4:2:0 up by 2, using JPEG chroma cositing^.
michael@0: 
michael@0:     + = RGB destination locations
michael@0:     * = Y' source locations
michael@0:     - = Cb, Cr source locations
michael@0: 
michael@0:     +   +   +   +  +   +   +   +
michael@0:       *       *      *       *
michael@0:     +   +   +   +  +   +   +   +
michael@0:           -              -
michael@0:     +   +   +   +  +   +   +   +
michael@0:       *       *      *       *
michael@0:     +   +   +   +  +   +   +   +
michael@0: 
michael@0:     +   +   +   +  +   +   +   +
michael@0:       *       *      *       *
michael@0:     +   +   +   +  +   +   +   +
michael@0:           -              -
michael@0:     +   +   +   +  +   +   +   +
michael@0:       *       *      *       *
michael@0:     +   +   +   +  +   +   +   +
michael@0: 
michael@0:     So, the coordinates of the upper-left + (first destination site) should
michael@0:      be (-0.25,-0.25) in the source Y' coordinate system.
michael@0:     Similarly, the coordinates should be (-0.375,-0.375) in the source Cb, Cr
michael@0:      coordinate system.
michael@0:     Note that the origin and scale of these two coordinate systems is not the
michael@0:      same!
michael@0: 
michael@0:     ^JPEG cositing is required for Theora; VP8 doesn't specify cositing rules,
michael@0:      but nearly all software converters in existence (at least those that are
michael@0:      open source, and many that are not) use JPEG cositing instead of MPEG.*/
michael@0:   source_dx_q16 = (source_width<<16) / width;
michael@0:   source_x0_q16 = (source_x0<<16)+(source_dx_q16>>1)-0x8000;
michael@0:   source_dy_q16 = (source_height<<16) / height;
michael@0:   source_y0_q16 = (source_y0<<16)+(source_dy_q16>>1)-0x8000;
michael@0:   x_shift = (yuv_type != YV24);
michael@0:   y_shift = (yuv_type == YV12);
michael@0:   /*These two variables hold the difference between the origins of the Y' and
michael@0:      the Cb, Cr coordinate systems, using the scale of the Y' coordinate
michael@0:      system.*/
michael@0:   source_uv_xoffs_q16 = -(x_shift<<15);
michael@0:   source_uv_yoffs_q16 = -(y_shift<<15);
michael@0:   /*Compute the range of source rows we'll actually use.
michael@0:     This doesn't guarantee we won't read outside this range.*/
michael@0:   ymin = source_height >= 0 ? source_y0 : source_y0+source_height-1;
michael@0:   ymax = source_height >= 0 ? source_y0+source_height-1 : source_y0;
michael@0:   uvmin = ymin>>y_shift;
michael@0:   uvmax = ((ymax+1+y_shift)>>y_shift)-1;
michael@0:   /*Pick a dithering pattern.
michael@0:     The "&3" at the end is just in case RAND_MAX is lying.*/
michael@0:   dither = (rand()/(RAND_MAX>>2))&3;
michael@0:   /*Nearest-neighbor scaling.*/
michael@0:   if (filter == FILTER_NONE) {
michael@0:     yuv2rgb565_row_scale_nearest_ctx ctx;
michael@0:     yuv2rgb565_row_scale_nearest_func scale_row;
michael@0:     int y;
michael@0:     /*Add rounding offsets once, in advance.*/
michael@0:     source_x0_q16 += 0x8000;
michael@0:     source_y0_q16 += 0x8000;
michael@0:     source_uv_xoffs_q16 += (x_shift<<15);
michael@0:     source_uv_yoffs_q16 += (y_shift<<15);
michael@0:     if (yuv_type == YV12)
michael@0:       scale_row = ScaleYCbCr42xToRGB565_Nearest_Row_C;
michael@0:     else
michael@0:       scale_row = ScaleYCbCr444ToRGB565_Nearest_Row_C;
michael@0:     ctx.width = width;
michael@0:     ctx.source_x0_q16 = source_x0_q16;
michael@0:     ctx.source_dx_q16 = source_dx_q16;
michael@0:     ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16;
michael@0:     for (y=0; y<height; y++) {
michael@0:       int source_y;
michael@0:       ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch);
michael@0:       source_y = source_y0_q16>>16;
michael@0:       source_y = clamped(source_y, ymin, ymax);
michael@0:       ctx.y_row = y_buf + source_y*y_pitch;
michael@0:       source_y = (source_y0_q16+source_uv_yoffs_q16)>>(16+y_shift);
michael@0:       source_y = clamped(source_y, uvmin, uvmax);
michael@0:       source_y0_q16 += source_dy_q16;
michael@0:       ctx.u_row = u_buf + source_y*uv_pitch;
michael@0:       ctx.v_row = v_buf + source_y*uv_pitch;
michael@0:       (*scale_row)(&ctx, dither);
michael@0:       dither ^= 2;
michael@0:     }
michael@0:   }
michael@0:   /*Bilinear scaling.*/
michael@0:   else {
michael@0:     yuv2rgb565_row_scale_bilinear_ctx ctx;
michael@0:     yuv2rgb565_row_scale_bilinear_func scale_row;
michael@0:     int uvxscale_min;
michael@0:     int uvxscale_max;
michael@0:     int uvyscale_min;
michael@0:     int uvyscale_max;
michael@0:     int y;
michael@0:     /*Check how close the chroma scaling is to unity.
michael@0:       If it's close enough, we can get away with nearest-neighbor chroma
michael@0:        sub-sampling, and only doing bilinear on luma.
michael@0:       If a given axis is subsampled, we use bounds on the luma step of
michael@0:        [0.67...2], which is equivalent to scaling chroma by [1...3].
michael@0:       If it's not subsampled, we use bounds of [0.5...1.33], which is
michael@0:        equivalent to scaling chroma by [0.75...2].
michael@0:       The lower bound is chosen as a trade-off between speed and how terrible
michael@0:        nearest neighbor looks when upscaling.*/
michael@0: # define CHROMA_NEAREST_SUBSAMP_STEP_MIN  0xAAAA
michael@0: # define CHROMA_NEAREST_NORMAL_STEP_MIN   0x8000
michael@0: # define CHROMA_NEAREST_SUBSAMP_STEP_MAX 0x20000
michael@0: # define CHROMA_NEAREST_NORMAL_STEP_MAX  0x15555
michael@0:     uvxscale_min = yuv_type != YV24 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
michael@0:     uvxscale_max = yuv_type != YV24 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
michael@0:     uvyscale_min = yuv_type == YV12 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
michael@0:     uvyscale_max = yuv_type == YV12 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
michael@0:     if (uvxscale_min <= abs(source_dx_q16)
michael@0:      && abs(source_dx_q16) <= uvxscale_max
michael@0:      && uvyscale_min <= abs(source_dy_q16)
michael@0:      && abs(source_dy_q16) <= uvyscale_max) {
michael@0:       /*Add the rounding offsets now.*/
michael@0:       source_uv_xoffs_q16 += 1<<(15+x_shift);
michael@0:       source_uv_yoffs_q16 += 1<<(15+y_shift);
michael@0:       if (yuv_type != YV24) {
michael@0:         scale_row =
michael@0: #  if defined(MOZILLA_MAY_SUPPORT_NEON)
michael@0:          supports_neon() ? ScaleYCbCr42xToRGB565_BilinearY_Row_NEON :
michael@0: #  endif
michael@0:          ScaleYCbCr42xToRGB565_BilinearY_Row_C;
michael@0:       }
michael@0:       else
michael@0:         scale_row = ScaleYCbCr444ToRGB565_BilinearY_Row_C;
michael@0:     }
michael@0:     else {
michael@0:       if (yuv_type == YV12)
michael@0:         scale_row = ScaleYCbCr420ToRGB565_Bilinear_Row_C;
michael@0:       else if (yuv_type == YV16)
michael@0:         scale_row = ScaleYCbCr422ToRGB565_Bilinear_Row_C;
michael@0:       else
michael@0:         scale_row = ScaleYCbCr444ToRGB565_Bilinear_Row_C;
michael@0:     }
michael@0:     ctx.width = width;
michael@0:     ctx.y_pitch = y_pitch;
michael@0:     ctx.source_x0_q16 = source_x0_q16;
michael@0:     ctx.source_dx_q16 = source_dx_q16;
michael@0:     ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16;
michael@0:     ctx.uv_pitch = uv_pitch;
michael@0:     for (y=0; y<height; y++) {
michael@0:       int source_y;
michael@0:       int yweight;
michael@0:       int uvweight;
michael@0:       ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch);
michael@0:       source_y = (source_y0_q16+128)>>16;
michael@0:       yweight = ((source_y0_q16+128)>>8)&0xFF;
michael@0:       if (source_y < ymin) {
michael@0:         source_y = ymin;
michael@0:         yweight = 0;
michael@0:       }
michael@0:       if (source_y > ymax) {
michael@0:         source_y = ymax;
michael@0:         yweight = 0;
michael@0:       }
michael@0:       ctx.y_row = y_buf + source_y*y_pitch;
michael@0:       source_y = source_y0_q16+source_uv_yoffs_q16+(128<<y_shift);
michael@0:       source_y0_q16 += source_dy_q16;
michael@0:       uvweight = source_y>>(8+y_shift)&0xFF;
michael@0:       source_y >>= 16+y_shift;
michael@0:       if (source_y < uvmin) {
michael@0:         source_y = uvmin;
michael@0:         uvweight = 0;
michael@0:       }
michael@0:       if (source_y > uvmax) {
michael@0:         source_y = uvmax;
michael@0:         uvweight = 0;
michael@0:       }
michael@0:       ctx.u_row = u_buf + source_y*uv_pitch;
michael@0:       ctx.v_row = v_buf + source_y*uv_pitch;
michael@0:       ctx.y_yweight = yweight;
michael@0:       ctx.uv_yweight = uvweight;
michael@0:       (*scale_row)(&ctx, dither);
michael@0:       dither ^= 2;
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_GFX_(bool) IsScaleYCbCrToRGB565Fast(int source_x0,
michael@0:                                        int source_y0,
michael@0:                                        int source_width,
michael@0:                                        int source_height,
michael@0:                                        int width,
michael@0:                                        int height,
michael@0:                                        YUVType yuv_type,
michael@0:                                        ScaleFilter filter)
michael@0: {
michael@0:   // Very fast.
michael@0:   if (width <= 0 || height <= 0)
michael@0:     return true;
michael@0: #  if defined(MOZILLA_MAY_SUPPORT_NEON)
michael@0:   if (filter != FILTER_NONE) {
michael@0:     int source_dx_q16;
michael@0:     int source_dy_q16;
michael@0:     int uvxscale_min;
michael@0:     int uvxscale_max;
michael@0:     int uvyscale_min;
michael@0:     int uvyscale_max;
michael@0:     source_dx_q16 = (source_width<<16) / width;
michael@0:     source_dy_q16 = (source_height<<16) / height;
michael@0:     uvxscale_min = yuv_type != YV24 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
michael@0:     uvxscale_max = yuv_type != YV24 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
michael@0:     uvyscale_min = yuv_type == YV12 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN;
michael@0:     uvyscale_max = yuv_type == YV12 ?
michael@0:      CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX;
michael@0:     if (uvxscale_min <= abs(source_dx_q16)
michael@0:      && abs(source_dx_q16) <= uvxscale_max
michael@0:      && uvyscale_min <= abs(source_dy_q16)
michael@0:      && abs(source_dy_q16) <= uvyscale_max) {
michael@0:       if (yuv_type != YV24)
michael@0:         return supports_neon();
michael@0:     }
michael@0:   }
michael@0: #  endif
michael@0:   return false;
michael@0: }
michael@0: 
michael@0: 
michael@0: 
michael@0: void yuv_to_rgb565_row_c(uint16 *dst,
michael@0:                          const uint8 *y,
michael@0:                          const uint8 *u,
michael@0:                          const uint8 *v,
michael@0:                          int x_shift,
michael@0:                          int pic_x,
michael@0:                          int pic_width)
michael@0: {
michael@0:   int x;
michael@0:   for (x = 0; x < pic_width; x++)
michael@0:   {
michael@0:     dst[x] = yu2rgb565(y[pic_x+x],
michael@0:                        u[(pic_x+x)>>x_shift],
michael@0:                        v[(pic_x+x)>>x_shift],
michael@0:                        2); // Disable dithering for now.
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* y_buf,
michael@0:                                    const uint8* u_buf,
michael@0:                                    const uint8* v_buf,
michael@0:                                    uint8* rgb_buf,
michael@0:                                    int pic_x,
michael@0:                                    int pic_y,
michael@0:                                    int pic_width,
michael@0:                                    int pic_height,
michael@0:                                    int y_pitch,
michael@0:                                    int uv_pitch,
michael@0:                                    int rgb_pitch,
michael@0:                                    YUVType yuv_type)
michael@0: {
michael@0:   int x_shift;
michael@0:   int y_shift;
michael@0:   x_shift = yuv_type != YV24;
michael@0:   y_shift = yuv_type == YV12;
michael@0: #  ifdef MOZILLA_MAY_SUPPORT_NEON
michael@0:   if (yuv_type != YV24 && supports_neon())
michael@0:   {
michael@0:     for (int i = 0; i < pic_height; i++) {
michael@0:       int yoffs;
michael@0:       int uvoffs;
michael@0:       yoffs = y_pitch * (pic_y+i) + pic_x;
michael@0:       uvoffs = uv_pitch * ((pic_y+i)>>y_shift) + (pic_x>>x_shift);
michael@0:       yuv42x_to_rgb565_row_neon((uint16*)(rgb_buf + rgb_pitch * i),
michael@0:                                 y_buf + yoffs,
michael@0:                                 u_buf + uvoffs,
michael@0:                                 v_buf + uvoffs,
michael@0:                                 pic_width,
michael@0:                                 pic_x&x_shift);
michael@0:     }
michael@0:   }
michael@0:   else
michael@0: #  endif
michael@0:   {
michael@0:     for (int i = 0; i < pic_height; i++) {
michael@0:       int yoffs;
michael@0:       int uvoffs;
michael@0:       yoffs = y_pitch * (pic_y+i);
michael@0:       uvoffs = uv_pitch * ((pic_y+i)>>y_shift);
michael@0:       yuv_to_rgb565_row_c((uint16*)(rgb_buf + rgb_pitch * i),
michael@0:                           y_buf + yoffs,
michael@0:                           u_buf + uvoffs,
michael@0:                           v_buf + uvoffs,
michael@0:                           x_shift,
michael@0:                           pic_x,
michael@0:                           pic_width);
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: NS_GFX_(bool) IsConvertYCbCrToRGB565Fast(int pic_x,
michael@0:                                          int pic_y,
michael@0:                                          int pic_width,
michael@0:                                          int pic_height,
michael@0:                                          YUVType yuv_type)
michael@0: {
michael@0: #  if defined(MOZILLA_MAY_SUPPORT_NEON)
michael@0:   return (yuv_type != YV24 && supports_neon());
michael@0: #  else
michael@0:   return false;
michael@0: #  endif
michael@0: }
michael@0: 
michael@0: } // namespace gfx
michael@0: 
michael@0: } // namespace mozilla
michael@0: 
michael@0: #endif // HAVE_YCBCR_TO_RGB565