gfx/skia/trunk/src/opts/SkBitmapProcState_matrix_neon.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1
michael@0 2 #include <arm_neon.h>
michael@0 3
michael@0 4
michael@0 5 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale)
michael@0 6 #define SCALE_FILTER_NAME MAKENAME(_filter_scale)
michael@0 7 #define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine)
michael@0 8 #define AFFINE_FILTER_NAME MAKENAME(_filter_affine)
michael@0 9 #define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp)
michael@0 10 #define PERSP_FILTER_NAME MAKENAME(_filter_persp)
michael@0 11
michael@0 12 #define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x)
michael@0 13 #define PACK_FILTER_Y_NAME MAKENAME(_pack_filter_y)
michael@0 14 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
michael@0 15 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
michael@0 16
michael@0 17 #ifndef PREAMBLE
michael@0 18 #define PREAMBLE(state)
michael@0 19 #define PREAMBLE_PARAM_X
michael@0 20 #define PREAMBLE_PARAM_Y
michael@0 21 #define PREAMBLE_ARG_X
michael@0 22 #define PREAMBLE_ARG_Y
michael@0 23 #endif
michael@0 24
michael@0 25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
michael@0 26 uint32_t xy[], int count, int x, int y) {
michael@0 27 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
michael@0 28 SkMatrix::kScale_Mask)) == 0);
michael@0 29
michael@0 30 PREAMBLE(s);
michael@0 31
michael@0 32 // we store y, x, x, x, x, x
michael@0 33 const unsigned maxX = s.fBitmap->width() - 1;
michael@0 34 SkFractionalInt fx;
michael@0 35 {
michael@0 36 SkPoint pt;
michael@0 37 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
michael@0 38 SkIntToScalar(y) + SK_ScalarHalf, &pt);
michael@0 39 fx = SkScalarToFractionalInt(pt.fY);
michael@0 40 const unsigned maxY = s.fBitmap->height() - 1;
michael@0 41 *xy++ = TILEY_PROCF(SkFractionalIntToFixed(fx), maxY);
michael@0 42 fx = SkScalarToFractionalInt(pt.fX);
michael@0 43 }
michael@0 44
michael@0 45 if (0 == maxX) {
michael@0 46 // all of the following X values must be 0
michael@0 47 memset(xy, 0, count * sizeof(uint16_t));
michael@0 48 return;
michael@0 49 }
michael@0 50
michael@0 51 const SkFractionalInt dx = s.fInvSxFractionalInt;
michael@0 52
michael@0 53 #ifdef CHECK_FOR_DECAL
michael@0 54 // test if we don't need to apply the tile proc
michael@0 55 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
michael@0 56 decal_nofilter_scale_neon(xy, SkFractionalIntToFixed(fx),
michael@0 57 SkFractionalIntToFixed(dx), count);
michael@0 58 return;
michael@0 59 }
michael@0 60 #endif
michael@0 61
michael@0 62 if (count >= 8) {
michael@0 63 SkFractionalInt dx2 = dx+dx;
michael@0 64 SkFractionalInt dx4 = dx2+dx2;
michael@0 65 SkFractionalInt dx8 = dx4+dx4;
michael@0 66
michael@0 67 // now build fx/fx+dx/fx+2dx/fx+3dx
michael@0 68 SkFractionalInt fx1, fx2, fx3;
michael@0 69 int32x4_t lbase, hbase;
michael@0 70 int16_t *dst16 = (int16_t *)xy;
michael@0 71
michael@0 72 fx1 = fx+dx;
michael@0 73 fx2 = fx1+dx;
michael@0 74 fx3 = fx2+dx;
michael@0 75
michael@0 76 lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
michael@0 77 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
michael@0 78 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
michael@0 79 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
michael@0 80 hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
michael@0 81
michael@0 82 // store & bump
michael@0 83 while (count >= 8) {
michael@0 84
michael@0 85 int16x8_t fx8;
michael@0 86
michael@0 87 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
michael@0 88
michael@0 89 vst1q_s16(dst16, fx8);
michael@0 90
michael@0 91 // but preserving base & on to the next
michael@0 92 lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
michael@0 93 hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
michael@0 94 dst16 += 8;
michael@0 95 count -= 8;
michael@0 96 fx += dx8;
michael@0 97 };
michael@0 98 xy = (uint32_t *) dst16;
michael@0 99 }
michael@0 100
michael@0 101 uint16_t* xx = (uint16_t*)xy;
michael@0 102 for (int i = count; i > 0; --i) {
michael@0 103 *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
michael@0 104 fx += dx;
michael@0 105 }
michael@0 106 }
michael@0 107
michael@0 108 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
michael@0 109 uint32_t xy[], int count, int x, int y) {
michael@0 110 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
michael@0 111 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
michael@0 112 SkMatrix::kScale_Mask |
michael@0 113 SkMatrix::kAffine_Mask)) == 0);
michael@0 114
michael@0 115 PREAMBLE(s);
michael@0 116 SkPoint srcPt;
michael@0 117 s.fInvProc(s.fInvMatrix,
michael@0 118 SkIntToScalar(x) + SK_ScalarHalf,
michael@0 119 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
michael@0 120
michael@0 121 SkFractionalInt fx = SkScalarToFractionalInt(srcPt.fX);
michael@0 122 SkFractionalInt fy = SkScalarToFractionalInt(srcPt.fY);
michael@0 123 SkFractionalInt dx = s.fInvSxFractionalInt;
michael@0 124 SkFractionalInt dy = s.fInvKyFractionalInt;
michael@0 125 int maxX = s.fBitmap->width() - 1;
michael@0 126 int maxY = s.fBitmap->height() - 1;
michael@0 127
michael@0 128 if (count >= 8) {
michael@0 129 SkFractionalInt dx4 = dx * 4;
michael@0 130 SkFractionalInt dy4 = dy * 4;
michael@0 131 SkFractionalInt dx8 = dx * 8;
michael@0 132 SkFractionalInt dy8 = dy * 8;
michael@0 133
michael@0 134 int32x4_t xbase, ybase;
michael@0 135 int32x4_t x2base, y2base;
michael@0 136 int16_t *dst16 = (int16_t *) xy;
michael@0 137
michael@0 138 // now build fx, fx+dx, fx+2dx, fx+3dx
michael@0 139 xbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
michael@0 140 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1);
michael@0 141 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2);
michael@0 142 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3);
michael@0 143
michael@0 144 // same for fy
michael@0 145 ybase = vdupq_n_s32(SkFractionalIntToFixed(fy));
michael@0 146 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1);
michael@0 147 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2);
michael@0 148 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3);
michael@0 149
michael@0 150 x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
michael@0 151 y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4)));
michael@0 152
michael@0 153 // store & bump
michael@0 154 do {
michael@0 155 int16x8x2_t hi16;
michael@0 156
michael@0 157 hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
michael@0 158 hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
michael@0 159
michael@0 160 vst2q_s16(dst16, hi16);
michael@0 161
michael@0 162 // moving base and on to the next
michael@0 163 xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
michael@0 164 ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
michael@0 165 x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
michael@0 166 y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
michael@0 167
michael@0 168 dst16 += 16; // 8x32 aka 16x16
michael@0 169 count -= 8;
michael@0 170 fx += dx8;
michael@0 171 fy += dy8;
michael@0 172 } while (count >= 8);
michael@0 173 xy = (uint32_t *) dst16;
michael@0 174 }
michael@0 175
michael@0 176 for (int i = count; i > 0; --i) {
michael@0 177 *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) |
michael@0 178 TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
michael@0 179 fx += dx; fy += dy;
michael@0 180 }
michael@0 181 }
michael@0 182
michael@0 183 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
michael@0 184 uint32_t* SK_RESTRICT xy,
michael@0 185 int count, int x, int y) {
michael@0 186 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
michael@0 187
michael@0 188 PREAMBLE(s);
michael@0 189 // max{X,Y} are int here, but later shown/assumed to fit in 16 bits
michael@0 190 int maxX = s.fBitmap->width() - 1;
michael@0 191 int maxY = s.fBitmap->height() - 1;
michael@0 192
michael@0 193 SkPerspIter iter(s.fInvMatrix,
michael@0 194 SkIntToScalar(x) + SK_ScalarHalf,
michael@0 195 SkIntToScalar(y) + SK_ScalarHalf, count);
michael@0 196
michael@0 197 while ((count = iter.next()) != 0) {
michael@0 198 const SkFixed* SK_RESTRICT srcXY = iter.getXY();
michael@0 199
michael@0 200 if (count >= 8) {
michael@0 201 int32_t *mysrc = (int32_t *) srcXY;
michael@0 202 int16_t *mydst = (int16_t *) xy;
michael@0 203 do {
michael@0 204 int16x8x2_t hi16;
michael@0 205 int32x4x2_t xy1, xy2;
michael@0 206
michael@0 207 xy1 = vld2q_s32(mysrc);
michael@0 208 xy2 = vld2q_s32(mysrc+8);
michael@0 209
michael@0 210 hi16.val[0] = TILEX_PROCF_NEON8(xy1.val[0], xy2.val[0], maxX);
michael@0 211 hi16.val[1] = TILEY_PROCF_NEON8(xy1.val[1], xy2.val[1], maxY);
michael@0 212
michael@0 213 vst2q_s16(mydst, hi16);
michael@0 214
michael@0 215 count -= 8; // 8 iterations
michael@0 216 mysrc += 16; // 16 longs
michael@0 217 mydst += 16; // 16 shorts, aka 8 longs
michael@0 218 } while (count >= 8);
michael@0 219 // get xy and srcXY fixed up
michael@0 220 srcXY = (const SkFixed *) mysrc;
michael@0 221 xy = (uint32_t *) mydst;
michael@0 222 }
michael@0 223
michael@0 224 while (--count >= 0) {
michael@0 225 *xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) |
michael@0 226 TILEX_PROCF(srcXY[0], maxX);
michael@0 227 srcXY += 2;
michael@0 228 }
michael@0 229 }
michael@0 230 }
michael@0 231
michael@0 232 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
michael@0 233 SkFixed one PREAMBLE_PARAM_Y) {
michael@0 234 unsigned i = TILEY_PROCF(f, max);
michael@0 235 i = (i << 4) | TILEY_LOW_BITS(f, max);
michael@0 236 return (i << 14) | (TILEY_PROCF((f + one), max));
michael@0 237 }
michael@0 238
michael@0 239 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
michael@0 240 SkFixed one PREAMBLE_PARAM_X) {
michael@0 241 unsigned i = TILEX_PROCF(f, max);
michael@0 242 i = (i << 4) | TILEX_LOW_BITS(f, max);
michael@0 243 return (i << 14) | (TILEX_PROCF((f + one), max));
michael@0 244 }
michael@0 245
michael@0 246 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
michael@0 247 SkFixed one PREAMBLE_PARAM_X) {
michael@0 248 int32x4_t ret, res, wide_one;
michael@0 249
michael@0 250 // Prepare constants
michael@0 251 wide_one = vdupq_n_s32(one);
michael@0 252
michael@0 253 // Step 1
michael@0 254 res = TILEX_PROCF_NEON4(f, max);
michael@0 255
michael@0 256 // Step 2
michael@0 257 ret = TILEX_LOW_BITS_NEON4(f, max);
michael@0 258 ret = vsliq_n_s32(ret, res, 4);
michael@0 259
michael@0 260 // Step 3
michael@0 261 res = TILEX_PROCF_NEON4(f + wide_one, max);
michael@0 262 ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
michael@0 263
michael@0 264 return ret;
michael@0 265 }
michael@0 266
michael@0 267 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
michael@0 268 SkFixed one PREAMBLE_PARAM_X) {
michael@0 269 int32x4_t ret, res, wide_one;
michael@0 270
michael@0 271 // Prepare constants
michael@0 272 wide_one = vdupq_n_s32(one);
michael@0 273
michael@0 274 // Step 1
michael@0 275 res = TILEY_PROCF_NEON4(f, max);
michael@0 276
michael@0 277 // Step 2
michael@0 278 ret = TILEY_LOW_BITS_NEON4(f, max);
michael@0 279 ret = vsliq_n_s32(ret, res, 4);
michael@0 280
michael@0 281 // Step 3
michael@0 282 res = TILEY_PROCF_NEON4(f + wide_one, max);
michael@0 283 ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
michael@0 284
michael@0 285 return ret;
michael@0 286 }
michael@0 287
michael@0 288 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
michael@0 289 uint32_t xy[], int count, int x, int y) {
michael@0 290 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
michael@0 291 SkMatrix::kScale_Mask)) == 0);
michael@0 292 SkASSERT(s.fInvKy == 0);
michael@0 293
michael@0 294 PREAMBLE(s);
michael@0 295
michael@0 296 const unsigned maxX = s.fBitmap->width() - 1;
michael@0 297 const SkFixed one = s.fFilterOneX;
michael@0 298 const SkFractionalInt dx = s.fInvSxFractionalInt;
michael@0 299 SkFractionalInt fx;
michael@0 300
michael@0 301 {
michael@0 302 SkPoint pt;
michael@0 303 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
michael@0 304 SkIntToScalar(y) + SK_ScalarHalf, &pt);
michael@0 305 const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
michael@0 306 const unsigned maxY = s.fBitmap->height() - 1;
michael@0 307 // compute our two Y values up front
michael@0 308 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
michael@0 309 // now initialize fx
michael@0 310 fx = SkScalarToFractionalInt(pt.fX) - (SkFixedToFractionalInt(one) >> 1);
michael@0 311 }
michael@0 312
michael@0 313 #ifdef CHECK_FOR_DECAL
michael@0 314 // test if we don't need to apply the tile proc
michael@0 315 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
michael@0 316 decal_filter_scale_neon(xy, SkFractionalIntToFixed(fx),
michael@0 317 SkFractionalIntToFixed(dx), count);
michael@0 318 return;
michael@0 319 }
michael@0 320 #endif
michael@0 321 {
michael@0 322
michael@0 323 if (count >= 4) {
michael@0 324 int32x4_t wide_fx;
michael@0 325
michael@0 326 wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
michael@0 327 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
michael@0 328 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
michael@0 329 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
michael@0 330
michael@0 331 while (count >= 4) {
michael@0 332 int32x4_t res;
michael@0 333
michael@0 334 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
michael@0 335
michael@0 336 vst1q_u32(xy, vreinterpretq_u32_s32(res));
michael@0 337
michael@0 338 wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
michael@0 339 fx += dx+dx+dx+dx;
michael@0 340 xy += 4;
michael@0 341 count -= 4;
michael@0 342 }
michael@0 343 }
michael@0 344
michael@0 345 while (--count >= 0) {
michael@0 346 *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
michael@0 347 fx += dx;
michael@0 348 }
michael@0 349
michael@0 350 }
michael@0 351 }
michael@0 352
michael@0 353 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
michael@0 354 uint32_t xy[], int count, int x, int y) {
michael@0 355 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
michael@0 356 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
michael@0 357 SkMatrix::kScale_Mask |
michael@0 358 SkMatrix::kAffine_Mask)) == 0);
michael@0 359
michael@0 360 PREAMBLE(s);
michael@0 361 SkPoint srcPt;
michael@0 362 s.fInvProc(s.fInvMatrix,
michael@0 363 SkIntToScalar(x) + SK_ScalarHalf,
michael@0 364 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
michael@0 365
michael@0 366 SkFixed oneX = s.fFilterOneX;
michael@0 367 SkFixed oneY = s.fFilterOneY;
michael@0 368 SkFixed fx = SkScalarToFixed(srcPt.fX) - (oneX >> 1);
michael@0 369 SkFixed fy = SkScalarToFixed(srcPt.fY) - (oneY >> 1);
michael@0 370 SkFixed dx = s.fInvSx;
michael@0 371 SkFixed dy = s.fInvKy;
michael@0 372 unsigned maxX = s.fBitmap->width() - 1;
michael@0 373 unsigned maxY = s.fBitmap->height() - 1;
michael@0 374
michael@0 375 if (count >= 4) {
michael@0 376 int32x4_t wide_fy, wide_fx;
michael@0 377
michael@0 378 wide_fx = vdupq_n_s32(fx);
michael@0 379 wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
michael@0 380 wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
michael@0 381 wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
michael@0 382
michael@0 383 wide_fy = vdupq_n_s32(fy);
michael@0 384 wide_fy = vsetq_lane_s32(fy+dy, wide_fy, 1);
michael@0 385 wide_fy = vsetq_lane_s32(fy+dy+dy, wide_fy, 2);
michael@0 386 wide_fy = vsetq_lane_s32(fy+dy+dy+dy, wide_fy, 3);
michael@0 387
michael@0 388 while (count >= 4) {
michael@0 389 int32x4x2_t vxy;
michael@0 390
michael@0 391 // do the X side, then the Y side, then interleave them
michael@0 392 vxy.val[0] = PACK_FILTER_Y4_NAME(wide_fy, maxY, oneY PREAMBLE_ARG_Y);
michael@0 393 vxy.val[1] = PACK_FILTER_X4_NAME(wide_fx, maxX, oneX PREAMBLE_ARG_X);
michael@0 394
michael@0 395 // interleave as YXYXYXYX as part of the storing
michael@0 396 vst2q_s32((int32_t*)xy, vxy);
michael@0 397
michael@0 398 // prepare next iteration
michael@0 399 wide_fx += vdupq_n_s32(dx+dx+dx+dx);
michael@0 400 fx += dx + dx + dx + dx;
michael@0 401 wide_fy += vdupq_n_s32(dy+dy+dy+dy);
michael@0 402 fy += dy+dy+dy+dy;
michael@0 403 xy += 8; // 4 x's, 4 y's
michael@0 404 count -= 4;
michael@0 405 }
michael@0 406 }
michael@0 407
michael@0 408 while (--count >= 0) {
michael@0 409 // NB: writing Y/X
michael@0 410 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, oneY PREAMBLE_ARG_Y);
michael@0 411 fy += dy;
michael@0 412 *xy++ = PACK_FILTER_X_NAME(fx, maxX, oneX PREAMBLE_ARG_X);
michael@0 413 fx += dx;
michael@0 414 }
michael@0 415 }
michael@0 416
michael@0 417 static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
michael@0 418 uint32_t* SK_RESTRICT xy, int count,
michael@0 419 int x, int y) {
michael@0 420 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
michael@0 421
michael@0 422 PREAMBLE(s);
michael@0 423 unsigned maxX = s.fBitmap->width() - 1;
michael@0 424 unsigned maxY = s.fBitmap->height() - 1;
michael@0 425 SkFixed oneX = s.fFilterOneX;
michael@0 426 SkFixed oneY = s.fFilterOneY;
michael@0 427
michael@0 428 SkPerspIter iter(s.fInvMatrix,
michael@0 429 SkIntToScalar(x) + SK_ScalarHalf,
michael@0 430 SkIntToScalar(y) + SK_ScalarHalf, count);
michael@0 431
michael@0 432 while ((count = iter.next()) != 0) {
michael@0 433 const SkFixed* SK_RESTRICT srcXY = iter.getXY();
michael@0 434
michael@0 435 while (count >= 4) {
michael@0 436 int32x4_t wide_x, wide_y;
michael@0 437 int32x4x2_t vxy, vresyx;
michael@0 438
michael@0 439 // load src: x-y-x-y-x-y-x-y
michael@0 440 vxy = vld2q_s32(srcXY);
michael@0 441
michael@0 442 // do the X side, then the Y side, then interleave them
michael@0 443 wide_x = vsubq_s32(vxy.val[0], vdupq_n_s32(oneX>>1));
michael@0 444 wide_y = vsubq_s32(vxy.val[1], vdupq_n_s32(oneY>>1));
michael@0 445
michael@0 446 vresyx.val[0] = PACK_FILTER_Y4_NAME(wide_y, maxY, oneY PREAMBLE_ARG_Y);
michael@0 447 vresyx.val[1] = PACK_FILTER_X4_NAME(wide_x, maxX, oneX PREAMBLE_ARG_X);
michael@0 448
michael@0 449 // store interleaved as y-x-y-x-y-x-y-x (NB != read order)
michael@0 450 vst2q_s32((int32_t*)xy, vresyx);
michael@0 451
michael@0 452 // on to the next iteration
michael@0 453 srcXY += 2*4;
michael@0 454 count -= 4;
michael@0 455 xy += 2*4;
michael@0 456 }
michael@0 457
michael@0 458 while (--count >= 0) {
michael@0 459 // NB: we read x/y, we write y/x
michael@0 460 *xy++ = PACK_FILTER_Y_NAME(srcXY[1] - (oneY >> 1), maxY,
michael@0 461 oneY PREAMBLE_ARG_Y);
michael@0 462 *xy++ = PACK_FILTER_X_NAME(srcXY[0] - (oneX >> 1), maxX,
michael@0 463 oneX PREAMBLE_ARG_X);
michael@0 464 srcXY += 2;
michael@0 465 }
michael@0 466 }
michael@0 467 }
michael@0 468
michael@0 469 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
michael@0 470 SCALE_NOFILTER_NAME,
michael@0 471 SCALE_FILTER_NAME,
michael@0 472 AFFINE_NOFILTER_NAME,
michael@0 473 AFFINE_FILTER_NAME,
michael@0 474 PERSP_NOFILTER_NAME,
michael@0 475 PERSP_FILTER_NAME
michael@0 476 };
michael@0 477
michael@0 478 #undef TILEX_PROCF_NEON8
michael@0 479 #undef TILEY_PROCF_NEON8
michael@0 480 #undef TILEX_PROCF_NEON4
michael@0 481 #undef TILEY_PROCF_NEON4
michael@0 482 #undef TILEX_LOW_BITS_NEON4
michael@0 483 #undef TILEY_LOW_BITS_NEON4
michael@0 484
michael@0 485 #undef MAKENAME
michael@0 486 #undef TILEX_PROCF
michael@0 487 #undef TILEY_PROCF
michael@0 488 #ifdef CHECK_FOR_DECAL
michael@0 489 #undef CHECK_FOR_DECAL
michael@0 490 #endif
michael@0 491
michael@0 492 #undef SCALE_NOFILTER_NAME
michael@0 493 #undef SCALE_FILTER_NAME
michael@0 494 #undef AFFINE_NOFILTER_NAME
michael@0 495 #undef AFFINE_FILTER_NAME
michael@0 496 #undef PERSP_NOFILTER_NAME
michael@0 497 #undef PERSP_FILTER_NAME
michael@0 498
michael@0 499 #undef PREAMBLE
michael@0 500 #undef PREAMBLE_PARAM_X
michael@0 501 #undef PREAMBLE_PARAM_Y
michael@0 502 #undef PREAMBLE_ARG_X
michael@0 503 #undef PREAMBLE_ARG_Y
michael@0 504
michael@0 505 #undef TILEX_LOW_BITS
michael@0 506 #undef TILEY_LOW_BITS

mercurial