michael@0: changeset:   94061:73a9b24d863a
michael@0: tag:         bilin
michael@0: tag:         qbase
michael@0: tag:         qtip
michael@0: tag:         tip
michael@0: user:        Jeff Muizelaar <jmuizelaar@mozilla.com>
michael@0: date:        Tue May 15 18:26:16 2012 -0400
michael@0: summary:     Bug 754364. Add bilinear non-repeat and repeat fast paths. r=joe
michael@0: 
michael@0: diff --git a/gfx/cairo/libpixman/src/pixman-fast-path.c b/gfx/cairo/libpixman/src/pixman-fast-path.c
michael@0: --- a/gfx/cairo/libpixman/src/pixman-fast-path.c
michael@0: +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c
michael@0: @@ -1186,16 +1186,228 @@ FAST_NEAREST (8888_565_none, 8888, 0565,
michael@0:  FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
michael@0:  FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
michael@0:  FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
michael@0:  FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
michael@0:  FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
michael@0:  FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
michael@0:  FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
michael@0:  
michael@0: +static force_inline void
michael@0: +scaled_bilinear_scanline_8888_565_OVER (uint16_t *       dst,
michael@0: +                                        const uint32_t * mask,
michael@0: +                                        const uint32_t * src_top,
michael@0: +                                        const uint32_t * src_bottom,
michael@0: +                                        int32_t          w,
michael@0: +                                        int              wt,
michael@0: +                                        int              wb,
michael@0: +                                        pixman_fixed_t   vx,
michael@0: +                                        pixman_fixed_t   unit_x,
michael@0: +                                        pixman_fixed_t   max_vx,
michael@0: +                                        pixman_bool_t    zero_src)
michael@0: +{
michael@0: +    while ((w -= 1) >= 0)
michael@0: +    {
michael@0: +	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
michael@0: +	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
michael@0: +	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint32_t src, result;
michael@0: +	uint16_t d;
michael@0: +	d = *dst;
michael@0: +	src = bilinear_interpolation (tl, tr,
michael@0: +				      bl, br,
michael@0: +				      interpolation_coord(vx),
michael@0: +				      wb >> (8 - INTERPOLATION_PRECISION_BITS));
michael@0: +	vx += unit_x;
michael@0: +	result = over (src, CONVERT_0565_TO_0888 (d));
michael@0: +	*dst++ = CONVERT_8888_TO_0565(result);
michael@0: +    }
michael@0: +}
michael@0: +
michael@0: +static force_inline void
michael@0: +scaled_bilinear_scanline_8888_8888_OVER (uint32_t *       dst,
michael@0: +                                         const uint32_t * mask,
michael@0: +                                         const uint32_t * src_top,
michael@0: +                                         const uint32_t * src_bottom,
michael@0: +                                         int32_t          w,
michael@0: +                                         int              wt,
michael@0: +                                         int              wb,
michael@0: +                                         pixman_fixed_t   vx,
michael@0: +                                         pixman_fixed_t   unit_x,
michael@0: +                                         pixman_fixed_t   max_vx,
michael@0: +                                         pixman_bool_t    zero_src)
michael@0: +{
michael@0: +    while ((w -= 1) >= 0)
michael@0: +    {
michael@0: +	uint32_t tl = src_top [pixman_fixed_to_int (vx)];
michael@0: +	uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
michael@0: +	uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint32_t src;
michael@0: +	uint32_t d;
michael@0: +	uint32_t result;
michael@0: +	d = *dst;
michael@0: +	src = bilinear_interpolation (tl, tr,
michael@0: +				      bl, br,
michael@0: +				      interpolation_coord(vx),
michael@0: +				      wb >> (8 - INTERPOLATION_PRECISION_BITS));
michael@0: +	vx += unit_x;
michael@0: +	*dst++ = over (src, d);
michael@0: +    }
michael@0: +}
michael@0: +
michael@0: +#if 1
michael@0: +
michael@0: +static force_inline void
michael@0: +scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
michael@0: +				      const uint32_t * mask,
michael@0: +				      const uint16_t * src_top,
michael@0: +				      const uint16_t * src_bottom,
michael@0: +				      int32_t          w,
michael@0: +				      int              wt,
michael@0: +				      int              wb,
michael@0: +				      pixman_fixed_t   vx,
michael@0: +				      pixman_fixed_t   unit_x,
michael@0: +				      pixman_fixed_t   max_vx,
michael@0: +				      pixman_bool_t    zero_src)
michael@0: +{
michael@0: +    while ((w -= 1) >= 0)
michael@0: +    {
michael@0: +	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
michael@0: +	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
michael@0: +	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint32_t d;
michael@0: +	d = bilinear_interpolation(CONVERT_0565_TO_8888(tl),
michael@0: +				   CONVERT_0565_TO_8888(tr),
michael@0: +				   CONVERT_0565_TO_8888(bl),
michael@0: +				   CONVERT_0565_TO_8888(br),
michael@0: +				   interpolation_coord(vx),
michael@0: +				   wb >> (8 - INTERPOLATION_PRECISION_BITS));
michael@0: +	vx += unit_x;
michael@0: +	*dst++ = CONVERT_8888_TO_0565(d);
michael@0: +    }
michael@0: +}
michael@0: +
michael@0: +#else
michael@0: +
michael@0: +#define SK_G16_MASK_IN_PLACE 0xfc0
michael@0: +
michael@0: +static inline uint32_t SkExpand_rgb_16(uint16_t c) {
michael@0: +
michael@0: +    return ((c & SK_G16_MASK_IN_PLACE) << 16) | (c & ~SK_G16_MASK_IN_PLACE);
michael@0: +}
michael@0: +
michael@0: +/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
michael@0: +    color value. The computation yields only 16bits of valid data, but we claim
michael@0: +    to return 32bits, so that the compiler won't generate extra instructions to
michael@0: +    "clean" the top 16bits. However, the top 16 can contain garbage, so it is
michael@0: +    up to the caller to safely ignore them.
michael@0: +*/
michael@0: +static inline uint16_t SkCompact_rgb_16(uint32_t c) {
michael@0: +    return ((c >> 16) & SK_G16_MASK_IN_PLACE) | (c & ~SK_G16_MASK_IN_PLACE);
michael@0: +}
michael@0: +// returns expanded * 5bits
michael@0: +static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
michael@0: +                                           uint32_t a00, uint32_t a01,
michael@0: +                                           uint32_t a10, uint32_t a11) {
michael@0: +    a00 = SkExpand_rgb_16(a00);
michael@0: +    a01 = SkExpand_rgb_16(a01);
michael@0: +    a10 = SkExpand_rgb_16(a10);
michael@0: +    a11 = SkExpand_rgb_16(a11);
michael@0: +    
michael@0: +    int xy = x * y >> 3;
michael@0: +    return  a00 * (32 - 2*y - 2*x + xy) +
michael@0: +            a01 * (2*x - xy) +
michael@0: +            a10 * (2*y - xy) +
michael@0: +            a11 * xy;
michael@0: +}
michael@0: +
michael@0: +
michael@0: +
michael@0: +static force_inline void
michael@0: +scaled_bilinear_scanline_565_565_SRC (uint16_t *       dst,
michael@0: +				      const uint32_t * mask,
michael@0: +				      const uint16_t * src_top,
michael@0: +				      const uint16_t * src_bottom,
michael@0: +				      int32_t          w,
michael@0: +				      int              wt,
michael@0: +				      int              wb,
michael@0: +				      pixman_fixed_t   vx,
michael@0: +				      pixman_fixed_t   unit_x,
michael@0: +				      pixman_fixed_t   max_vx,
michael@0: +				      pixman_bool_t    zero_src)
michael@0: +{
michael@0: +    while ((w -= 1) >= 0)
michael@0: +    {
michael@0: +	uint16_t tl = src_top [pixman_fixed_to_int (vx)];
michael@0: +	uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
michael@0: +	uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
michael@0: +	uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
michael@0: +
michael@0: +        uint32_t tmp = Filter_565_Expanded((vx>>12)&0xf, wb>>4, tl, tr, bl, br);
michael@0: +        vx += unit_x;
michael@0: +        *dst++ = SkCompact_rgb_16((tmp) >> 5);
michael@0: +    }
michael@0: +}
michael@0: +
michael@0: +
michael@0: +#endif
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
michael@0: +			       scaled_bilinear_scanline_565_565_SRC,
michael@0: +			       uint16_t, uint32_t, uint16_t,
michael@0: +			       COVER, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
michael@0: +			       scaled_bilinear_scanline_565_565_SRC,
michael@0: +			       uint16_t, uint32_t, uint16_t,
michael@0: +			       PAD, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
michael@0: +			       scaled_bilinear_scanline_565_565_SRC,
michael@0: +			       uint16_t, uint32_t, uint16_t,
michael@0: +			       NONE, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
michael@0: +			       scaled_bilinear_scanline_565_565_SRC,
michael@0: +			       uint16_t, uint32_t, uint16_t,
michael@0: +			       NORMAL, FLAG_NONE)
michael@0: +
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_565_OVER,
michael@0: +			       uint32_t, uint32_t, uint16_t,
michael@0: +			       COVER, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_565_OVER,
michael@0: +			       uint32_t, uint32_t, uint16_t,
michael@0: +			       PAD, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_565_OVER,
michael@0: +			       uint32_t, uint32_t, uint16_t,
michael@0: +			       NONE, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_565_OVER,
michael@0: +			       uint32_t, uint32_t, uint16_t,
michael@0: +			       NORMAL, FLAG_NONE)
michael@0: +
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_8888_OVER,
michael@0: +			       uint32_t, uint32_t, uint32_t,
michael@0: +			       COVER, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_8888_OVER,
michael@0: +			       uint32_t, uint32_t, uint32_t,
michael@0: +			       PAD, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_8888_OVER,
michael@0: +			       uint32_t, uint32_t, uint32_t,
michael@0: +			       NONE, FLAG_NONE)
michael@0: +FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
michael@0: +			       scaled_bilinear_scanline_8888_8888_OVER,
michael@0: +			       uint32_t, uint32_t, uint32_t,
michael@0: +			       NORMAL, FLAG_NONE)
michael@0: +
michael@0:  #define REPEAT_MIN_WIDTH    32
michael@0:  
michael@0:  static void
michael@0:  fast_composite_tiled_repeat (pixman_implementation_t *imp,
michael@0:  			     pixman_composite_info_t *info)
michael@0:  {
michael@0:      PIXMAN_COMPOSITE_ARGS (info);
michael@0:      pixman_composite_func_t func;
michael@0: @@ -1960,16 +2172,20 @@ static const pixman_fast_path_t c_fast_p
michael@0:  	PIXMAN_any,
michael@0:  	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
michael@0:  	 FAST_PATH_NORMAL_REPEAT),
michael@0:  	PIXMAN_any, 0,
michael@0:  	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
michael@0:  	fast_composite_tiled_repeat
michael@0:      },
michael@0:  
michael@0: +    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
michael@0: +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
michael@0: +    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
michael@0: +
michael@0:      {   PIXMAN_OP_NONE	},
michael@0:  };
michael@0:  
michael@0:  #ifdef WORDS_BIGENDIAN
michael@0:  #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
michael@0:  #else
michael@0:  #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
michael@0:  #endif
michael@0: diff --git a/gfx/cairo/libpixman/src/pixman-inlines.h b/gfx/cairo/libpixman/src/pixman-inlines.h
michael@0: --- a/gfx/cairo/libpixman/src/pixman-inlines.h
michael@0: +++ b/gfx/cairo/libpixman/src/pixman-inlines.h
michael@0: @@ -80,16 +80,21 @@ repeat (pixman_repeat_t repeat, int *c, 
michael@0:      }
michael@0:      return TRUE;
michael@0:  }
michael@0:  
michael@0:  #ifdef MOZ_GFX_OPTIMIZE_MOBILE
michael@0:  #define LOW_QUALITY_INTERPOLATION
michael@0:  #endif
michael@0:  
michael@0: +#ifdef LOW_QUALITY_INTERPOLATION
michael@0: +#define INTERPOLATION_PRECISION_BITS 4
michael@0: +#else
michael@0: +#define INTERPOLATION_PRECISION_BITS 8
michael@0: +#endif
michael@0:  static force_inline int32_t
michael@0:  interpolation_coord(pixman_fixed_t t)
michael@0:  {
michael@0:  #ifdef LOW_QUALITY_INTERPOLATION
michael@0:      return (t >> 12) & 0xf;
michael@0:  #else
michael@0:      return (t >> 8) & 0xff;
michael@0:  #endif