gfx/skia/trunk/src/core/SkBitmapProcState_matrixProcs.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/skia/trunk/src/core/SkBitmapProcState_matrixProcs.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,466 @@
     1.4 +/* NEON optimized code (C) COPYRIGHT 2009 Motorola
     1.5 + *
     1.6 + * Use of this source code is governed by a BSD-style license that can be
     1.7 + * found in the LICENSE file.
     1.8 + */
     1.9 +
    1.10 +#include "SkBitmapProcState.h"
    1.11 +#include "SkPerspIter.h"
    1.12 +#include "SkShader.h"
    1.13 +#include "SkUtils.h"
    1.14 +#include "SkUtilsArm.h"
    1.15 +#include "SkBitmapProcState_utils.h"
    1.16 +
    1.17 +/*  returns 0...(n-1) given any x (positive or negative).
    1.18 +
    1.19 +    As an example, if n (which is always positive) is 5...
    1.20 +
    1.21 +          x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
    1.22 +    returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
    1.23 + */
    1.24 +static inline int sk_int_mod(int x, int n) {
    1.25 +    SkASSERT(n > 0);
    1.26 +    if ((unsigned)x >= (unsigned)n) {
    1.27 +        if (x < 0) {
    1.28 +            x = n + ~(~x % n);
    1.29 +        } else {
    1.30 +            x = x % n;
    1.31 +        }
    1.32 +    }
    1.33 +    return x;
    1.34 +}
    1.35 +
    1.36 +void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
    1.37 +void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
    1.38 +
    1.39 +// Compile neon code paths if needed
    1.40 +#if !SK_ARM_NEON_IS_NONE
    1.41 +
    1.42 +// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
    1.43 +extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
    1.44 +extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
    1.45 +
    1.46 +#endif // !SK_ARM_NEON_IS_NONE
    1.47 +
    1.48 +// Compile non-neon code path if needed
    1.49 +#if !SK_ARM_NEON_IS_ALWAYS
    1.50 +#define MAKENAME(suffix)        ClampX_ClampY ## suffix
    1.51 +#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
    1.52 +#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
    1.53 +#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
    1.54 +#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
    1.55 +#define CHECK_FOR_DECAL
    1.56 +#include "SkBitmapProcState_matrix.h"
    1.57 +
    1.58 +#define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
    1.59 +#define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
    1.60 +#define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
    1.61 +#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
    1.62 +#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
    1.63 +#include "SkBitmapProcState_matrix.h"
    1.64 +#endif
    1.65 +
    1.66 +#define MAKENAME(suffix)        GeneralXY ## suffix
    1.67 +#define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
    1.68 +                                SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
    1.69 +                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
    1.70 +                                SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
    1.71 +#define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
    1.72 +#define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
    1.73 +#define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
    1.74 +#define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
    1.75 +#define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
    1.76 +#define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
    1.77 +#define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
    1.78 +#define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
    1.79 +#include "SkBitmapProcState_matrix.h"
    1.80 +
    1.81 +static inline U16CPU fixed_clamp(SkFixed x)
    1.82 +{
    1.83 +    if (x < 0) {
    1.84 +        x = 0;
    1.85 +    }
    1.86 +    if (x >> 16) {
    1.87 +        x = 0xFFFF;
    1.88 +    }
    1.89 +    return x;
    1.90 +}
    1.91 +
    1.92 +static inline U16CPU fixed_repeat(SkFixed x)
    1.93 +{
    1.94 +    return x & 0xFFFF;
    1.95 +}
    1.96 +
    1.97 +// Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
    1.98 +// See http://code.google.com/p/skia/issues/detail?id=472
    1.99 +#if defined(_MSC_VER) && (_MSC_VER >= 1600)
   1.100 +#pragma optimize("", off)
   1.101 +#endif
   1.102 +
   1.103 +static inline U16CPU fixed_mirror(SkFixed x)
   1.104 +{
   1.105 +    SkFixed s = x << 15 >> 31;
   1.106 +    // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
   1.107 +    return (x ^ s) & 0xFFFF;
   1.108 +}
   1.109 +
   1.110 +#if defined(_MSC_VER) && (_MSC_VER >= 1600)
   1.111 +#pragma optimize("", on)
   1.112 +#endif
   1.113 +
   1.114 +static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
   1.115 +{
   1.116 +    if (SkShader::kClamp_TileMode == m)
   1.117 +        return fixed_clamp;
   1.118 +    if (SkShader::kRepeat_TileMode == m)
   1.119 +        return fixed_repeat;
   1.120 +    SkASSERT(SkShader::kMirror_TileMode == m);
   1.121 +    return fixed_mirror;
   1.122 +}
   1.123 +
   1.124 +static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
   1.125 +    return (x >> 12) & 0xF;
   1.126 +}
   1.127 +
   1.128 +static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
   1.129 +    return ((x * scale) >> 12) & 0xF;
   1.130 +}
   1.131 +
   1.132 +static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
   1.133 +    if (SkShader::kClamp_TileMode == m) {
   1.134 +        return fixed_clamp_lowbits;
   1.135 +    } else {
   1.136 +        SkASSERT(SkShader::kMirror_TileMode == m ||
   1.137 +                 SkShader::kRepeat_TileMode == m);
   1.138 +        // mirror and repeat have the same behavior for the low bits.
   1.139 +        return fixed_repeat_or_mirrow_lowbits;
   1.140 +    }
   1.141 +}
   1.142 +
   1.143 +static inline U16CPU int_clamp(int x, int n) {
   1.144 +    if (x >= n) {
   1.145 +        x = n - 1;
   1.146 +    }
   1.147 +    if (x < 0) {
   1.148 +        x = 0;
   1.149 +    }
   1.150 +    return x;
   1.151 +}
   1.152 +
   1.153 +static inline U16CPU int_repeat(int x, int n) {
   1.154 +    return sk_int_mod(x, n);
   1.155 +}
   1.156 +
   1.157 +static inline U16CPU int_mirror(int x, int n) {
   1.158 +    x = sk_int_mod(x, 2 * n);
   1.159 +    if (x >= n) {
   1.160 +        x = n + ~(x - n);
   1.161 +    }
   1.162 +    return x;
   1.163 +}
   1.164 +
   1.165 +#if 0
   1.166 +static void test_int_tileprocs() {
   1.167 +    for (int i = -8; i <= 8; i++) {
   1.168 +        SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
   1.169 +    }
   1.170 +}
   1.171 +#endif
   1.172 +
   1.173 +static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
   1.174 +    if (SkShader::kClamp_TileMode == tm)
   1.175 +        return int_clamp;
   1.176 +    if (SkShader::kRepeat_TileMode == tm)
   1.177 +        return int_repeat;
   1.178 +    SkASSERT(SkShader::kMirror_TileMode == tm);
   1.179 +    return int_mirror;
   1.180 +}
   1.181 +
   1.182 +//////////////////////////////////////////////////////////////////////////////
   1.183 +
   1.184 +void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
   1.185 +{
   1.186 +    int i;
   1.187 +
   1.188 +    for (i = (count >> 2); i > 0; --i)
   1.189 +    {
   1.190 +        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
   1.191 +        fx += dx+dx;
   1.192 +        *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
   1.193 +        fx += dx+dx;
   1.194 +    }
   1.195 +    count &= 3;
   1.196 +
   1.197 +    uint16_t* xx = (uint16_t*)dst;
   1.198 +    for (i = count; i > 0; --i) {
   1.199 +        *xx++ = SkToU16(fx >> 16); fx += dx;
   1.200 +    }
   1.201 +}
   1.202 +
   1.203 +void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
   1.204 +{
   1.205 +
   1.206 +
   1.207 +    if (count & 1)
   1.208 +    {
   1.209 +        SkASSERT((fx >> (16 + 14)) == 0);
   1.210 +        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
   1.211 +        fx += dx;
   1.212 +    }
   1.213 +    while ((count -= 2) >= 0)
   1.214 +    {
   1.215 +        SkASSERT((fx >> (16 + 14)) == 0);
   1.216 +        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
   1.217 +        fx += dx;
   1.218 +
   1.219 +        *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
   1.220 +        fx += dx;
   1.221 +    }
   1.222 +}
   1.223 +
   1.224 +///////////////////////////////////////////////////////////////////////////////
   1.225 +// stores the same as SCALE, but is cheaper to compute. Also since there is no
   1.226 +// scale, we don't need/have a FILTER version
   1.227 +
   1.228 +static void fill_sequential(uint16_t xptr[], int start, int count) {
   1.229 +#if 1
   1.230 +    if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
   1.231 +        *xptr++ = start++;
   1.232 +        count -= 1;
   1.233 +    }
   1.234 +    if (count > 3) {
   1.235 +        uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
   1.236 +        uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
   1.237 +        uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
   1.238 +        start += count & ~3;
   1.239 +        int qcount = count >> 2;
   1.240 +        do {
   1.241 +            *xxptr++ = pattern0;
   1.242 +            pattern0 += 0x40004;
   1.243 +            *xxptr++ = pattern1;
   1.244 +            pattern1 += 0x40004;
   1.245 +        } while (--qcount != 0);
   1.246 +        xptr = reinterpret_cast<uint16_t*>(xxptr);
   1.247 +        count &= 3;
   1.248 +    }
   1.249 +    while (--count >= 0) {
   1.250 +        *xptr++ = start++;
   1.251 +    }
   1.252 +#else
   1.253 +    for (int i = 0; i < count; i++) {
   1.254 +        *xptr++ = start++;
   1.255 +    }
   1.256 +#endif
   1.257 +}
   1.258 +
   1.259 +static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
   1.260 +                                   int x, int y) {
   1.261 +    SkPoint pt;
   1.262 +    s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
   1.263 +               SkIntToScalar(y) + SK_ScalarHalf, &pt);
   1.264 +    **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
   1.265 +                           s.fBitmap->height());
   1.266 +    *xy += 1;   // bump the ptr
   1.267 +    // return our starting X position
   1.268 +    return SkScalarToFixed(pt.fX) >> 16;
   1.269 +}
   1.270 +
   1.271 +static void clampx_nofilter_trans(const SkBitmapProcState& s,
   1.272 +                                  uint32_t xy[], int count, int x, int y) {
   1.273 +    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
   1.274 +
   1.275 +    int xpos = nofilter_trans_preamble(s, &xy, x, y);
   1.276 +    const int width = s.fBitmap->width();
   1.277 +    if (1 == width) {
   1.278 +        // all of the following X values must be 0
   1.279 +        memset(xy, 0, count * sizeof(uint16_t));
   1.280 +        return;
   1.281 +    }
   1.282 +
   1.283 +    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
   1.284 +    int n;
   1.285 +
   1.286 +    // fill before 0 as needed
   1.287 +    if (xpos < 0) {
   1.288 +        n = -xpos;
   1.289 +        if (n > count) {
   1.290 +            n = count;
   1.291 +        }
   1.292 +        memset(xptr, 0, n * sizeof(uint16_t));
   1.293 +        count -= n;
   1.294 +        if (0 == count) {
   1.295 +            return;
   1.296 +        }
   1.297 +        xptr += n;
   1.298 +        xpos = 0;
   1.299 +    }
   1.300 +
   1.301 +    // fill in 0..width-1 if needed
   1.302 +    if (xpos < width) {
   1.303 +        n = width - xpos;
   1.304 +        if (n > count) {
   1.305 +            n = count;
   1.306 +        }
   1.307 +        fill_sequential(xptr, xpos, n);
   1.308 +        count -= n;
   1.309 +        if (0 == count) {
   1.310 +            return;
   1.311 +        }
   1.312 +        xptr += n;
   1.313 +    }
   1.314 +
   1.315 +    // fill the remaining with the max value
   1.316 +    sk_memset16(xptr, width - 1, count);
   1.317 +}
   1.318 +
   1.319 +static void repeatx_nofilter_trans(const SkBitmapProcState& s,
   1.320 +                                   uint32_t xy[], int count, int x, int y) {
   1.321 +    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
   1.322 +
   1.323 +    int xpos = nofilter_trans_preamble(s, &xy, x, y);
   1.324 +    const int width = s.fBitmap->width();
   1.325 +    if (1 == width) {
   1.326 +        // all of the following X values must be 0
   1.327 +        memset(xy, 0, count * sizeof(uint16_t));
   1.328 +        return;
   1.329 +    }
   1.330 +
   1.331 +    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
   1.332 +    int start = sk_int_mod(xpos, width);
   1.333 +    int n = width - start;
   1.334 +    if (n > count) {
   1.335 +        n = count;
   1.336 +    }
   1.337 +    fill_sequential(xptr, start, n);
   1.338 +    xptr += n;
   1.339 +    count -= n;
   1.340 +
   1.341 +    while (count >= width) {
   1.342 +        fill_sequential(xptr, 0, width);
   1.343 +        xptr += width;
   1.344 +        count -= width;
   1.345 +    }
   1.346 +
   1.347 +    if (count > 0) {
   1.348 +        fill_sequential(xptr, 0, count);
   1.349 +    }
   1.350 +}
   1.351 +
   1.352 +static void fill_backwards(uint16_t xptr[], int pos, int count) {
   1.353 +    for (int i = 0; i < count; i++) {
   1.354 +        SkASSERT(pos >= 0);
   1.355 +        xptr[i] = pos--;
   1.356 +    }
   1.357 +}
   1.358 +
   1.359 +static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
   1.360 +                                   uint32_t xy[], int count, int x, int y) {
   1.361 +    SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
   1.362 +
   1.363 +    int xpos = nofilter_trans_preamble(s, &xy, x, y);
   1.364 +    const int width = s.fBitmap->width();
   1.365 +    if (1 == width) {
   1.366 +        // all of the following X values must be 0
   1.367 +        memset(xy, 0, count * sizeof(uint16_t));
   1.368 +        return;
   1.369 +    }
   1.370 +
   1.371 +    uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
   1.372 +    // need to know our start, and our initial phase (forward or backward)
   1.373 +    bool forward;
   1.374 +    int n;
   1.375 +    int start = sk_int_mod(xpos, 2 * width);
   1.376 +    if (start >= width) {
   1.377 +        start = width + ~(start - width);
   1.378 +        forward = false;
   1.379 +        n = start + 1;  // [start .. 0]
   1.380 +    } else {
   1.381 +        forward = true;
   1.382 +        n = width - start;  // [start .. width)
   1.383 +    }
   1.384 +    if (n > count) {
   1.385 +        n = count;
   1.386 +    }
   1.387 +    if (forward) {
   1.388 +        fill_sequential(xptr, start, n);
   1.389 +    } else {
   1.390 +        fill_backwards(xptr, start, n);
   1.391 +    }
   1.392 +    forward = !forward;
   1.393 +    xptr += n;
   1.394 +    count -= n;
   1.395 +
   1.396 +    while (count >= width) {
   1.397 +        if (forward) {
   1.398 +            fill_sequential(xptr, 0, width);
   1.399 +        } else {
   1.400 +            fill_backwards(xptr, width - 1, width);
   1.401 +        }
   1.402 +        forward = !forward;
   1.403 +        xptr += width;
   1.404 +        count -= width;
   1.405 +    }
   1.406 +
   1.407 +    if (count > 0) {
   1.408 +        if (forward) {
   1.409 +            fill_sequential(xptr, 0, count);
   1.410 +        } else {
   1.411 +            fill_backwards(xptr, width - 1, count);
   1.412 +        }
   1.413 +    }
   1.414 +}
   1.415 +
   1.416 +///////////////////////////////////////////////////////////////////////////////
   1.417 +
   1.418 +SkBitmapProcState::MatrixProc
   1.419 +SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
   1.420 +//    test_int_tileprocs();
   1.421 +    // check for our special case when there is no scale/affine/perspective
   1.422 +    if (trivial_matrix) {
   1.423 +        SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel);
   1.424 +        fIntTileProcY = choose_int_tile_proc(fTileModeY);
   1.425 +        switch (fTileModeX) {
   1.426 +            case SkShader::kClamp_TileMode:
   1.427 +                return clampx_nofilter_trans;
   1.428 +            case SkShader::kRepeat_TileMode:
   1.429 +                return repeatx_nofilter_trans;
   1.430 +            case SkShader::kMirror_TileMode:
   1.431 +                return mirrorx_nofilter_trans;
   1.432 +        }
   1.433 +    }
   1.434 +
   1.435 +    int index = 0;
   1.436 +    if (fFilterLevel != SkPaint::kNone_FilterLevel) {
   1.437 +        index = 1;
   1.438 +    }
   1.439 +    if (fInvType & SkMatrix::kPerspective_Mask) {
   1.440 +        index += 4;
   1.441 +    } else if (fInvType & SkMatrix::kAffine_Mask) {
   1.442 +        index += 2;
   1.443 +    }
   1.444 +
   1.445 +    if (SkShader::kClamp_TileMode == fTileModeX &&
   1.446 +        SkShader::kClamp_TileMode == fTileModeY)
   1.447 +    {
   1.448 +        // clamp gets special version of filterOne
   1.449 +        fFilterOneX = SK_Fixed1;
   1.450 +        fFilterOneY = SK_Fixed1;
   1.451 +        return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
   1.452 +    }
   1.453 +
   1.454 +    // all remaining procs use this form for filterOne
   1.455 +    fFilterOneX = SK_Fixed1 / fBitmap->width();
   1.456 +    fFilterOneY = SK_Fixed1 / fBitmap->height();
   1.457 +
   1.458 +    if (SkShader::kRepeat_TileMode == fTileModeX &&
   1.459 +        SkShader::kRepeat_TileMode == fTileModeY)
   1.460 +    {
   1.461 +        return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
   1.462 +    }
   1.463 +
   1.464 +    fTileProcX = choose_tile_proc(fTileModeX);
   1.465 +    fTileProcY = choose_tile_proc(fTileModeY);
   1.466 +    fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
   1.467 +    fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
   1.468 +    return GeneralXY_Procs[index];
   1.469 +}

mercurial