michael@0: /* NEON optimized code (C) COPYRIGHT 2009 Motorola michael@0: * michael@0: * Use of this source code is governed by a BSD-style license that can be michael@0: * found in the LICENSE file. michael@0: */ michael@0: michael@0: #include "SkBitmapProcState.h" michael@0: #include "SkPerspIter.h" michael@0: #include "SkShader.h" michael@0: #include "SkUtils.h" michael@0: #include "SkUtilsArm.h" michael@0: #include "SkBitmapProcState_utils.h" michael@0: michael@0: /* returns 0...(n-1) given any x (positive or negative). michael@0: michael@0: As an example, if n (which is always positive) is 5... michael@0: michael@0: x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 michael@0: returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 michael@0: */ michael@0: static inline int sk_int_mod(int x, int n) { michael@0: SkASSERT(n > 0); michael@0: if ((unsigned)x >= (unsigned)n) { michael@0: if (x < 0) { michael@0: x = n + ~(~x % n); michael@0: } else { michael@0: x = x % n; michael@0: } michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); michael@0: void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); michael@0: michael@0: // Compile neon code paths if needed michael@0: #if !SK_ARM_NEON_IS_NONE michael@0: michael@0: // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp michael@0: extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; michael@0: extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; michael@0: michael@0: #endif // !SK_ARM_NEON_IS_NONE michael@0: michael@0: // Compile non-neon code path if needed michael@0: #if !SK_ARM_NEON_IS_ALWAYS michael@0: #define MAKENAME(suffix) ClampX_ClampY ## suffix michael@0: #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) michael@0: #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) michael@0: #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) michael@0: #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) michael@0: #define CHECK_FOR_DECAL michael@0: #include "SkBitmapProcState_matrix.h" michael@0: michael@0: #define MAKENAME(suffix) RepeatX_RepeatY ## suffix michael@0: #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) michael@0: #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) michael@0: #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) michael@0: #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) michael@0: #include "SkBitmapProcState_matrix.h" michael@0: #endif michael@0: michael@0: #define MAKENAME(suffix) GeneralXY ## suffix michael@0: #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ michael@0: SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ michael@0: SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ michael@0: SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY michael@0: #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX michael@0: #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY michael@0: #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX michael@0: #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY michael@0: #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) michael@0: #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) michael@0: #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) michael@0: #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) michael@0: #include "SkBitmapProcState_matrix.h" michael@0: michael@0: static inline U16CPU fixed_clamp(SkFixed x) michael@0: { michael@0: if (x < 0) { michael@0: x = 0; michael@0: } michael@0: if (x >> 16) { michael@0: x = 0xFFFF; michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: static inline U16CPU fixed_repeat(SkFixed x) michael@0: { michael@0: return x & 0xFFFF; michael@0: } michael@0: michael@0: // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. michael@0: // See http://code.google.com/p/skia/issues/detail?id=472 michael@0: #if defined(_MSC_VER) && (_MSC_VER >= 1600) michael@0: #pragma optimize("", off) michael@0: #endif michael@0: michael@0: static inline U16CPU fixed_mirror(SkFixed x) michael@0: { michael@0: SkFixed s = x << 15 >> 31; michael@0: // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval michael@0: return (x ^ s) & 0xFFFF; michael@0: } michael@0: michael@0: #if defined(_MSC_VER) && (_MSC_VER >= 1600) michael@0: #pragma optimize("", on) michael@0: #endif michael@0: michael@0: static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) michael@0: { michael@0: if (SkShader::kClamp_TileMode == m) michael@0: return fixed_clamp; michael@0: if (SkShader::kRepeat_TileMode == m) michael@0: return fixed_repeat; michael@0: SkASSERT(SkShader::kMirror_TileMode == m); michael@0: return fixed_mirror; michael@0: } michael@0: michael@0: static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { michael@0: return (x >> 12) & 0xF; michael@0: } michael@0: michael@0: static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { michael@0: return ((x * scale) >> 12) & 0xF; michael@0: } michael@0: michael@0: static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { michael@0: if (SkShader::kClamp_TileMode == m) { michael@0: return fixed_clamp_lowbits; michael@0: } else { michael@0: SkASSERT(SkShader::kMirror_TileMode == m || michael@0: SkShader::kRepeat_TileMode == m); michael@0: // mirror and repeat have the same behavior for the low bits. michael@0: return fixed_repeat_or_mirrow_lowbits; michael@0: } michael@0: } michael@0: michael@0: static inline U16CPU int_clamp(int x, int n) { michael@0: if (x >= n) { michael@0: x = n - 1; michael@0: } michael@0: if (x < 0) { michael@0: x = 0; michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: static inline U16CPU int_repeat(int x, int n) { michael@0: return sk_int_mod(x, n); michael@0: } michael@0: michael@0: static inline U16CPU int_mirror(int x, int n) { michael@0: x = sk_int_mod(x, 2 * n); michael@0: if (x >= n) { michael@0: x = n + ~(x - n); michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: #if 0 michael@0: static void test_int_tileprocs() { michael@0: for (int i = -8; i <= 8; i++) { michael@0: SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { michael@0: if (SkShader::kClamp_TileMode == tm) michael@0: return int_clamp; michael@0: if (SkShader::kRepeat_TileMode == tm) michael@0: return int_repeat; michael@0: SkASSERT(SkShader::kMirror_TileMode == tm); michael@0: return int_mirror; michael@0: } michael@0: michael@0: ////////////////////////////////////////////////////////////////////////////// michael@0: michael@0: void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) michael@0: { michael@0: int i; michael@0: michael@0: for (i = (count >> 2); i > 0; --i) michael@0: { michael@0: *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); michael@0: fx += dx+dx; michael@0: *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); michael@0: fx += dx+dx; michael@0: } michael@0: count &= 3; michael@0: michael@0: uint16_t* xx = (uint16_t*)dst; michael@0: for (i = count; i > 0; --i) { michael@0: *xx++ = SkToU16(fx >> 16); fx += dx; michael@0: } michael@0: } michael@0: michael@0: void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) michael@0: { michael@0: michael@0: michael@0: if (count & 1) michael@0: { michael@0: SkASSERT((fx >> (16 + 14)) == 0); michael@0: *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); michael@0: fx += dx; michael@0: } michael@0: while ((count -= 2) >= 0) michael@0: { michael@0: SkASSERT((fx >> (16 + 14)) == 0); michael@0: *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); michael@0: fx += dx; michael@0: michael@0: *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); michael@0: fx += dx; michael@0: } michael@0: } michael@0: michael@0: /////////////////////////////////////////////////////////////////////////////// michael@0: // stores the same as SCALE, but is cheaper to compute. Also since there is no michael@0: // scale, we don't need/have a FILTER version michael@0: michael@0: static void fill_sequential(uint16_t xptr[], int start, int count) { michael@0: #if 1 michael@0: if (reinterpret_cast(xptr) & 0x2) { michael@0: *xptr++ = start++; michael@0: count -= 1; michael@0: } michael@0: if (count > 3) { michael@0: uint32_t* xxptr = reinterpret_cast(xptr); michael@0: uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); michael@0: uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); michael@0: start += count & ~3; michael@0: int qcount = count >> 2; michael@0: do { michael@0: *xxptr++ = pattern0; michael@0: pattern0 += 0x40004; michael@0: *xxptr++ = pattern1; michael@0: pattern1 += 0x40004; michael@0: } while (--qcount != 0); michael@0: xptr = reinterpret_cast(xxptr); michael@0: count &= 3; michael@0: } michael@0: while (--count >= 0) { michael@0: *xptr++ = start++; michael@0: } michael@0: #else michael@0: for (int i = 0; i < count; i++) { michael@0: *xptr++ = start++; michael@0: } michael@0: #endif michael@0: } michael@0: michael@0: static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, michael@0: int x, int y) { michael@0: SkPoint pt; michael@0: s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, michael@0: SkIntToScalar(y) + SK_ScalarHalf, &pt); michael@0: **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16, michael@0: s.fBitmap->height()); michael@0: *xy += 1; // bump the ptr michael@0: // return our starting X position michael@0: return SkScalarToFixed(pt.fX) >> 16; michael@0: } michael@0: michael@0: static void clampx_nofilter_trans(const SkBitmapProcState& s, michael@0: uint32_t xy[], int count, int x, int y) { michael@0: SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); michael@0: michael@0: int xpos = nofilter_trans_preamble(s, &xy, x, y); michael@0: const int width = s.fBitmap->width(); michael@0: if (1 == width) { michael@0: // all of the following X values must be 0 michael@0: memset(xy, 0, count * sizeof(uint16_t)); michael@0: return; michael@0: } michael@0: michael@0: uint16_t* xptr = reinterpret_cast(xy); michael@0: int n; michael@0: michael@0: // fill before 0 as needed michael@0: if (xpos < 0) { michael@0: n = -xpos; michael@0: if (n > count) { michael@0: n = count; michael@0: } michael@0: memset(xptr, 0, n * sizeof(uint16_t)); michael@0: count -= n; michael@0: if (0 == count) { michael@0: return; michael@0: } michael@0: xptr += n; michael@0: xpos = 0; michael@0: } michael@0: michael@0: // fill in 0..width-1 if needed michael@0: if (xpos < width) { michael@0: n = width - xpos; michael@0: if (n > count) { michael@0: n = count; michael@0: } michael@0: fill_sequential(xptr, xpos, n); michael@0: count -= n; michael@0: if (0 == count) { michael@0: return; michael@0: } michael@0: xptr += n; michael@0: } michael@0: michael@0: // fill the remaining with the max value michael@0: sk_memset16(xptr, width - 1, count); michael@0: } michael@0: michael@0: static void repeatx_nofilter_trans(const SkBitmapProcState& s, michael@0: uint32_t xy[], int count, int x, int y) { michael@0: SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); michael@0: michael@0: int xpos = nofilter_trans_preamble(s, &xy, x, y); michael@0: const int width = s.fBitmap->width(); michael@0: if (1 == width) { michael@0: // all of the following X values must be 0 michael@0: memset(xy, 0, count * sizeof(uint16_t)); michael@0: return; michael@0: } michael@0: michael@0: uint16_t* xptr = reinterpret_cast(xy); michael@0: int start = sk_int_mod(xpos, width); michael@0: int n = width - start; michael@0: if (n > count) { michael@0: n = count; michael@0: } michael@0: fill_sequential(xptr, start, n); michael@0: xptr += n; michael@0: count -= n; michael@0: michael@0: while (count >= width) { michael@0: fill_sequential(xptr, 0, width); michael@0: xptr += width; michael@0: count -= width; michael@0: } michael@0: michael@0: if (count > 0) { michael@0: fill_sequential(xptr, 0, count); michael@0: } michael@0: } michael@0: michael@0: static void fill_backwards(uint16_t xptr[], int pos, int count) { michael@0: for (int i = 0; i < count; i++) { michael@0: SkASSERT(pos >= 0); michael@0: xptr[i] = pos--; michael@0: } michael@0: } michael@0: michael@0: static void mirrorx_nofilter_trans(const SkBitmapProcState& s, michael@0: uint32_t xy[], int count, int x, int y) { michael@0: SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); michael@0: michael@0: int xpos = nofilter_trans_preamble(s, &xy, x, y); michael@0: const int width = s.fBitmap->width(); michael@0: if (1 == width) { michael@0: // all of the following X values must be 0 michael@0: memset(xy, 0, count * sizeof(uint16_t)); michael@0: return; michael@0: } michael@0: michael@0: uint16_t* xptr = reinterpret_cast(xy); michael@0: // need to know our start, and our initial phase (forward or backward) michael@0: bool forward; michael@0: int n; michael@0: int start = sk_int_mod(xpos, 2 * width); michael@0: if (start >= width) { michael@0: start = width + ~(start - width); michael@0: forward = false; michael@0: n = start + 1; // [start .. 0] michael@0: } else { michael@0: forward = true; michael@0: n = width - start; // [start .. width) michael@0: } michael@0: if (n > count) { michael@0: n = count; michael@0: } michael@0: if (forward) { michael@0: fill_sequential(xptr, start, n); michael@0: } else { michael@0: fill_backwards(xptr, start, n); michael@0: } michael@0: forward = !forward; michael@0: xptr += n; michael@0: count -= n; michael@0: michael@0: while (count >= width) { michael@0: if (forward) { michael@0: fill_sequential(xptr, 0, width); michael@0: } else { michael@0: fill_backwards(xptr, width - 1, width); michael@0: } michael@0: forward = !forward; michael@0: xptr += width; michael@0: count -= width; michael@0: } michael@0: michael@0: if (count > 0) { michael@0: if (forward) { michael@0: fill_sequential(xptr, 0, count); michael@0: } else { michael@0: fill_backwards(xptr, width - 1, count); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /////////////////////////////////////////////////////////////////////////////// michael@0: michael@0: SkBitmapProcState::MatrixProc michael@0: SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { michael@0: // test_int_tileprocs(); michael@0: // check for our special case when there is no scale/affine/perspective michael@0: if (trivial_matrix) { michael@0: SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel); michael@0: fIntTileProcY = choose_int_tile_proc(fTileModeY); michael@0: switch (fTileModeX) { michael@0: case SkShader::kClamp_TileMode: michael@0: return clampx_nofilter_trans; michael@0: case SkShader::kRepeat_TileMode: michael@0: return repeatx_nofilter_trans; michael@0: case SkShader::kMirror_TileMode: michael@0: return mirrorx_nofilter_trans; michael@0: } michael@0: } michael@0: michael@0: int index = 0; michael@0: if (fFilterLevel != SkPaint::kNone_FilterLevel) { michael@0: index = 1; michael@0: } michael@0: if (fInvType & SkMatrix::kPerspective_Mask) { michael@0: index += 4; michael@0: } else if (fInvType & SkMatrix::kAffine_Mask) { michael@0: index += 2; michael@0: } michael@0: michael@0: if (SkShader::kClamp_TileMode == fTileModeX && michael@0: SkShader::kClamp_TileMode == fTileModeY) michael@0: { michael@0: // clamp gets special version of filterOne michael@0: fFilterOneX = SK_Fixed1; michael@0: fFilterOneY = SK_Fixed1; michael@0: return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; michael@0: } michael@0: michael@0: // all remaining procs use this form for filterOne michael@0: fFilterOneX = SK_Fixed1 / fBitmap->width(); michael@0: fFilterOneY = SK_Fixed1 / fBitmap->height(); michael@0: michael@0: if (SkShader::kRepeat_TileMode == fTileModeX && michael@0: SkShader::kRepeat_TileMode == fTileModeY) michael@0: { michael@0: return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; michael@0: } michael@0: michael@0: fTileProcX = choose_tile_proc(fTileModeX); michael@0: fTileProcY = choose_tile_proc(fTileModeY); michael@0: fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); michael@0: fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); michael@0: return GeneralXY_Procs[index]; michael@0: }