1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/skia/trunk/src/core/SkBitmapProcState_matrixProcs.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,466 @@ 1.4 +/* NEON optimized code (C) COPYRIGHT 2009 Motorola 1.5 + * 1.6 + * Use of this source code is governed by a BSD-style license that can be 1.7 + * found in the LICENSE file. 1.8 + */ 1.9 + 1.10 +#include "SkBitmapProcState.h" 1.11 +#include "SkPerspIter.h" 1.12 +#include "SkShader.h" 1.13 +#include "SkUtils.h" 1.14 +#include "SkUtilsArm.h" 1.15 +#include "SkBitmapProcState_utils.h" 1.16 + 1.17 +/* returns 0...(n-1) given any x (positive or negative). 1.18 + 1.19 + As an example, if n (which is always positive) is 5... 1.20 + 1.21 + x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 1.22 + returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 1.23 + */ 1.24 +static inline int sk_int_mod(int x, int n) { 1.25 + SkASSERT(n > 0); 1.26 + if ((unsigned)x >= (unsigned)n) { 1.27 + if (x < 0) { 1.28 + x = n + ~(~x % n); 1.29 + } else { 1.30 + x = x % n; 1.31 + } 1.32 + } 1.33 + return x; 1.34 +} 1.35 + 1.36 +void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 1.37 +void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 1.38 + 1.39 +// Compile neon code paths if needed 1.40 +#if !SK_ARM_NEON_IS_NONE 1.41 + 1.42 +// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 1.43 +extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 1.44 +extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 1.45 + 1.46 +#endif // !SK_ARM_NEON_IS_NONE 1.47 + 1.48 +// Compile non-neon code path if needed 1.49 +#if !SK_ARM_NEON_IS_ALWAYS 1.50 +#define MAKENAME(suffix) ClampX_ClampY ## suffix 1.51 +#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 1.52 +#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 1.53 +#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) 1.54 +#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) 1.55 +#define CHECK_FOR_DECAL 1.56 +#include "SkBitmapProcState_matrix.h" 1.57 + 1.58 +#define MAKENAME(suffix) RepeatX_RepeatY ## suffix 1.59 +#define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) 1.60 +#define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) 1.61 +#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 1.62 +#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 1.63 +#include "SkBitmapProcState_matrix.h" 1.64 +#endif 1.65 + 1.66 +#define MAKENAME(suffix) GeneralXY ## suffix 1.67 +#define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 1.68 + SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ 1.69 + SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ 1.70 + SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY 1.71 +#define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX 1.72 +#define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY 1.73 +#define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX 1.74 +#define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY 1.75 +#define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 1.76 +#define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 1.77 +#define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) 1.78 +#define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) 1.79 +#include "SkBitmapProcState_matrix.h" 1.80 + 1.81 +static inline U16CPU fixed_clamp(SkFixed x) 1.82 +{ 1.83 + if (x < 0) { 1.84 + x = 0; 1.85 + } 1.86 + if (x >> 16) { 1.87 + x = 0xFFFF; 1.88 + } 1.89 + return x; 1.90 +} 1.91 + 1.92 +static inline U16CPU fixed_repeat(SkFixed x) 1.93 +{ 1.94 + return x & 0xFFFF; 1.95 +} 1.96 + 1.97 +// Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. 1.98 +// See http://code.google.com/p/skia/issues/detail?id=472 1.99 +#if defined(_MSC_VER) && (_MSC_VER >= 1600) 1.100 +#pragma optimize("", off) 1.101 +#endif 1.102 + 1.103 +static inline U16CPU fixed_mirror(SkFixed x) 1.104 +{ 1.105 + SkFixed s = x << 15 >> 31; 1.106 + // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 1.107 + return (x ^ s) & 0xFFFF; 1.108 +} 1.109 + 1.110 +#if defined(_MSC_VER) && (_MSC_VER >= 1600) 1.111 +#pragma optimize("", on) 1.112 +#endif 1.113 + 1.114 +static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) 1.115 +{ 1.116 + if (SkShader::kClamp_TileMode == m) 1.117 + return fixed_clamp; 1.118 + if (SkShader::kRepeat_TileMode == m) 1.119 + return fixed_repeat; 1.120 + SkASSERT(SkShader::kMirror_TileMode == m); 1.121 + return fixed_mirror; 1.122 +} 1.123 + 1.124 +static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { 1.125 + return (x >> 12) & 0xF; 1.126 +} 1.127 + 1.128 +static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { 1.129 + return ((x * scale) >> 12) & 0xF; 1.130 +} 1.131 + 1.132 +static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { 1.133 + if (SkShader::kClamp_TileMode == m) { 1.134 + return fixed_clamp_lowbits; 1.135 + } else { 1.136 + SkASSERT(SkShader::kMirror_TileMode == m || 1.137 + SkShader::kRepeat_TileMode == m); 1.138 + // mirror and repeat have the same behavior for the low bits. 1.139 + return fixed_repeat_or_mirrow_lowbits; 1.140 + } 1.141 +} 1.142 + 1.143 +static inline U16CPU int_clamp(int x, int n) { 1.144 + if (x >= n) { 1.145 + x = n - 1; 1.146 + } 1.147 + if (x < 0) { 1.148 + x = 0; 1.149 + } 1.150 + return x; 1.151 +} 1.152 + 1.153 +static inline U16CPU int_repeat(int x, int n) { 1.154 + return sk_int_mod(x, n); 1.155 +} 1.156 + 1.157 +static inline U16CPU int_mirror(int x, int n) { 1.158 + x = sk_int_mod(x, 2 * n); 1.159 + if (x >= n) { 1.160 + x = n + ~(x - n); 1.161 + } 1.162 + return x; 1.163 +} 1.164 + 1.165 +#if 0 1.166 +static void test_int_tileprocs() { 1.167 + for (int i = -8; i <= 8; i++) { 1.168 + SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 1.169 + } 1.170 +} 1.171 +#endif 1.172 + 1.173 +static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 1.174 + if (SkShader::kClamp_TileMode == tm) 1.175 + return int_clamp; 1.176 + if (SkShader::kRepeat_TileMode == tm) 1.177 + return int_repeat; 1.178 + SkASSERT(SkShader::kMirror_TileMode == tm); 1.179 + return int_mirror; 1.180 +} 1.181 + 1.182 +////////////////////////////////////////////////////////////////////////////// 1.183 + 1.184 +void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) 1.185 +{ 1.186 + int i; 1.187 + 1.188 + for (i = (count >> 2); i > 0; --i) 1.189 + { 1.190 + *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 1.191 + fx += dx+dx; 1.192 + *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 1.193 + fx += dx+dx; 1.194 + } 1.195 + count &= 3; 1.196 + 1.197 + uint16_t* xx = (uint16_t*)dst; 1.198 + for (i = count; i > 0; --i) { 1.199 + *xx++ = SkToU16(fx >> 16); fx += dx; 1.200 + } 1.201 +} 1.202 + 1.203 +void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) 1.204 +{ 1.205 + 1.206 + 1.207 + if (count & 1) 1.208 + { 1.209 + SkASSERT((fx >> (16 + 14)) == 0); 1.210 + *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 1.211 + fx += dx; 1.212 + } 1.213 + while ((count -= 2) >= 0) 1.214 + { 1.215 + SkASSERT((fx >> (16 + 14)) == 0); 1.216 + *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 1.217 + fx += dx; 1.218 + 1.219 + *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 1.220 + fx += dx; 1.221 + } 1.222 +} 1.223 + 1.224 +/////////////////////////////////////////////////////////////////////////////// 1.225 +// stores the same as SCALE, but is cheaper to compute. Also since there is no 1.226 +// scale, we don't need/have a FILTER version 1.227 + 1.228 +static void fill_sequential(uint16_t xptr[], int start, int count) { 1.229 +#if 1 1.230 + if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 1.231 + *xptr++ = start++; 1.232 + count -= 1; 1.233 + } 1.234 + if (count > 3) { 1.235 + uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 1.236 + uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 1.237 + uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 1.238 + start += count & ~3; 1.239 + int qcount = count >> 2; 1.240 + do { 1.241 + *xxptr++ = pattern0; 1.242 + pattern0 += 0x40004; 1.243 + *xxptr++ = pattern1; 1.244 + pattern1 += 0x40004; 1.245 + } while (--qcount != 0); 1.246 + xptr = reinterpret_cast<uint16_t*>(xxptr); 1.247 + count &= 3; 1.248 + } 1.249 + while (--count >= 0) { 1.250 + *xptr++ = start++; 1.251 + } 1.252 +#else 1.253 + for (int i = 0; i < count; i++) { 1.254 + *xptr++ = start++; 1.255 + } 1.256 +#endif 1.257 +} 1.258 + 1.259 +static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 1.260 + int x, int y) { 1.261 + SkPoint pt; 1.262 + s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 1.263 + SkIntToScalar(y) + SK_ScalarHalf, &pt); 1.264 + **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16, 1.265 + s.fBitmap->height()); 1.266 + *xy += 1; // bump the ptr 1.267 + // return our starting X position 1.268 + return SkScalarToFixed(pt.fX) >> 16; 1.269 +} 1.270 + 1.271 +static void clampx_nofilter_trans(const SkBitmapProcState& s, 1.272 + uint32_t xy[], int count, int x, int y) { 1.273 + SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 1.274 + 1.275 + int xpos = nofilter_trans_preamble(s, &xy, x, y); 1.276 + const int width = s.fBitmap->width(); 1.277 + if (1 == width) { 1.278 + // all of the following X values must be 0 1.279 + memset(xy, 0, count * sizeof(uint16_t)); 1.280 + return; 1.281 + } 1.282 + 1.283 + uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 1.284 + int n; 1.285 + 1.286 + // fill before 0 as needed 1.287 + if (xpos < 0) { 1.288 + n = -xpos; 1.289 + if (n > count) { 1.290 + n = count; 1.291 + } 1.292 + memset(xptr, 0, n * sizeof(uint16_t)); 1.293 + count -= n; 1.294 + if (0 == count) { 1.295 + return; 1.296 + } 1.297 + xptr += n; 1.298 + xpos = 0; 1.299 + } 1.300 + 1.301 + // fill in 0..width-1 if needed 1.302 + if (xpos < width) { 1.303 + n = width - xpos; 1.304 + if (n > count) { 1.305 + n = count; 1.306 + } 1.307 + fill_sequential(xptr, xpos, n); 1.308 + count -= n; 1.309 + if (0 == count) { 1.310 + return; 1.311 + } 1.312 + xptr += n; 1.313 + } 1.314 + 1.315 + // fill the remaining with the max value 1.316 + sk_memset16(xptr, width - 1, count); 1.317 +} 1.318 + 1.319 +static void repeatx_nofilter_trans(const SkBitmapProcState& s, 1.320 + uint32_t xy[], int count, int x, int y) { 1.321 + SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 1.322 + 1.323 + int xpos = nofilter_trans_preamble(s, &xy, x, y); 1.324 + const int width = s.fBitmap->width(); 1.325 + if (1 == width) { 1.326 + // all of the following X values must be 0 1.327 + memset(xy, 0, count * sizeof(uint16_t)); 1.328 + return; 1.329 + } 1.330 + 1.331 + uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 1.332 + int start = sk_int_mod(xpos, width); 1.333 + int n = width - start; 1.334 + if (n > count) { 1.335 + n = count; 1.336 + } 1.337 + fill_sequential(xptr, start, n); 1.338 + xptr += n; 1.339 + count -= n; 1.340 + 1.341 + while (count >= width) { 1.342 + fill_sequential(xptr, 0, width); 1.343 + xptr += width; 1.344 + count -= width; 1.345 + } 1.346 + 1.347 + if (count > 0) { 1.348 + fill_sequential(xptr, 0, count); 1.349 + } 1.350 +} 1.351 + 1.352 +static void fill_backwards(uint16_t xptr[], int pos, int count) { 1.353 + for (int i = 0; i < count; i++) { 1.354 + SkASSERT(pos >= 0); 1.355 + xptr[i] = pos--; 1.356 + } 1.357 +} 1.358 + 1.359 +static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 1.360 + uint32_t xy[], int count, int x, int y) { 1.361 + SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 1.362 + 1.363 + int xpos = nofilter_trans_preamble(s, &xy, x, y); 1.364 + const int width = s.fBitmap->width(); 1.365 + if (1 == width) { 1.366 + // all of the following X values must be 0 1.367 + memset(xy, 0, count * sizeof(uint16_t)); 1.368 + return; 1.369 + } 1.370 + 1.371 + uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 1.372 + // need to know our start, and our initial phase (forward or backward) 1.373 + bool forward; 1.374 + int n; 1.375 + int start = sk_int_mod(xpos, 2 * width); 1.376 + if (start >= width) { 1.377 + start = width + ~(start - width); 1.378 + forward = false; 1.379 + n = start + 1; // [start .. 0] 1.380 + } else { 1.381 + forward = true; 1.382 + n = width - start; // [start .. width) 1.383 + } 1.384 + if (n > count) { 1.385 + n = count; 1.386 + } 1.387 + if (forward) { 1.388 + fill_sequential(xptr, start, n); 1.389 + } else { 1.390 + fill_backwards(xptr, start, n); 1.391 + } 1.392 + forward = !forward; 1.393 + xptr += n; 1.394 + count -= n; 1.395 + 1.396 + while (count >= width) { 1.397 + if (forward) { 1.398 + fill_sequential(xptr, 0, width); 1.399 + } else { 1.400 + fill_backwards(xptr, width - 1, width); 1.401 + } 1.402 + forward = !forward; 1.403 + xptr += width; 1.404 + count -= width; 1.405 + } 1.406 + 1.407 + if (count > 0) { 1.408 + if (forward) { 1.409 + fill_sequential(xptr, 0, count); 1.410 + } else { 1.411 + fill_backwards(xptr, width - 1, count); 1.412 + } 1.413 + } 1.414 +} 1.415 + 1.416 +/////////////////////////////////////////////////////////////////////////////// 1.417 + 1.418 +SkBitmapProcState::MatrixProc 1.419 +SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 1.420 +// test_int_tileprocs(); 1.421 + // check for our special case when there is no scale/affine/perspective 1.422 + if (trivial_matrix) { 1.423 + SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel); 1.424 + fIntTileProcY = choose_int_tile_proc(fTileModeY); 1.425 + switch (fTileModeX) { 1.426 + case SkShader::kClamp_TileMode: 1.427 + return clampx_nofilter_trans; 1.428 + case SkShader::kRepeat_TileMode: 1.429 + return repeatx_nofilter_trans; 1.430 + case SkShader::kMirror_TileMode: 1.431 + return mirrorx_nofilter_trans; 1.432 + } 1.433 + } 1.434 + 1.435 + int index = 0; 1.436 + if (fFilterLevel != SkPaint::kNone_FilterLevel) { 1.437 + index = 1; 1.438 + } 1.439 + if (fInvType & SkMatrix::kPerspective_Mask) { 1.440 + index += 4; 1.441 + } else if (fInvType & SkMatrix::kAffine_Mask) { 1.442 + index += 2; 1.443 + } 1.444 + 1.445 + if (SkShader::kClamp_TileMode == fTileModeX && 1.446 + SkShader::kClamp_TileMode == fTileModeY) 1.447 + { 1.448 + // clamp gets special version of filterOne 1.449 + fFilterOneX = SK_Fixed1; 1.450 + fFilterOneY = SK_Fixed1; 1.451 + return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 1.452 + } 1.453 + 1.454 + // all remaining procs use this form for filterOne 1.455 + fFilterOneX = SK_Fixed1 / fBitmap->width(); 1.456 + fFilterOneY = SK_Fixed1 / fBitmap->height(); 1.457 + 1.458 + if (SkShader::kRepeat_TileMode == fTileModeX && 1.459 + SkShader::kRepeat_TileMode == fTileModeY) 1.460 + { 1.461 + return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 1.462 + } 1.463 + 1.464 + fTileProcX = choose_tile_proc(fTileModeX); 1.465 + fTileProcY = choose_tile_proc(fTileModeY); 1.466 + fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); 1.467 + fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); 1.468 + return GeneralXY_Procs[index]; 1.469 +}