gfx/skia/trunk/src/opts/SkBitmapProcState_matrix_neon.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     2 #include <arm_neon.h>
     5 #define SCALE_NOFILTER_NAME     MAKENAME(_nofilter_scale)
     6 #define SCALE_FILTER_NAME       MAKENAME(_filter_scale)
     7 #define AFFINE_NOFILTER_NAME    MAKENAME(_nofilter_affine)
     8 #define AFFINE_FILTER_NAME      MAKENAME(_filter_affine)
     9 #define PERSP_NOFILTER_NAME     MAKENAME(_nofilter_persp)
    10 #define PERSP_FILTER_NAME       MAKENAME(_filter_persp)
    12 #define PACK_FILTER_X_NAME  MAKENAME(_pack_filter_x)
    13 #define PACK_FILTER_Y_NAME  MAKENAME(_pack_filter_y)
    14 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
    15 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
    17 #ifndef PREAMBLE
    18     #define PREAMBLE(state)
    19     #define PREAMBLE_PARAM_X
    20     #define PREAMBLE_PARAM_Y
    21     #define PREAMBLE_ARG_X
    22     #define PREAMBLE_ARG_Y
    23 #endif
    25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
    26                                 uint32_t xy[], int count, int x, int y) {
    27     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
    28                              SkMatrix::kScale_Mask)) == 0);
    30     PREAMBLE(s);
    32     // we store y, x, x, x, x, x
    33     const unsigned maxX = s.fBitmap->width() - 1;
    34     SkFractionalInt fx;
    35     {
    36         SkPoint pt;
    37         s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
    38                                  SkIntToScalar(y) + SK_ScalarHalf, &pt);
    39         fx = SkScalarToFractionalInt(pt.fY);
    40         const unsigned maxY = s.fBitmap->height() - 1;
    41         *xy++ = TILEY_PROCF(SkFractionalIntToFixed(fx), maxY);
    42         fx = SkScalarToFractionalInt(pt.fX);
    43     }
    45     if (0 == maxX) {
    46         // all of the following X values must be 0
    47         memset(xy, 0, count * sizeof(uint16_t));
    48         return;
    49     }
    51     const SkFractionalInt dx = s.fInvSxFractionalInt;
    53 #ifdef CHECK_FOR_DECAL
    54     // test if we don't need to apply the tile proc
    55     if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
    56         decal_nofilter_scale_neon(xy, SkFractionalIntToFixed(fx),
    57                              SkFractionalIntToFixed(dx), count);
    58         return;
    59     }
    60 #endif
    62     if (count >= 8) {
    63         SkFractionalInt dx2 = dx+dx;
    64         SkFractionalInt dx4 = dx2+dx2;
    65         SkFractionalInt dx8 = dx4+dx4;
    67         // now build fx/fx+dx/fx+2dx/fx+3dx
    68         SkFractionalInt fx1, fx2, fx3;
    69         int32x4_t lbase, hbase;
    70         int16_t *dst16 = (int16_t *)xy;
    72         fx1 = fx+dx;
    73         fx2 = fx1+dx;
    74         fx3 = fx2+dx;
    76         lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
    77         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
    78         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
    79         lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
    80         hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
    82         // store & bump
    83         while (count >= 8) {
    85             int16x8_t fx8;
    87             fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
    89             vst1q_s16(dst16, fx8);
    91             // but preserving base & on to the next
    92             lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
    93             hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
    94             dst16 += 8;
    95             count -= 8;
    96             fx += dx8;
    97         };
    98         xy = (uint32_t *) dst16;
    99     }
   101     uint16_t* xx = (uint16_t*)xy;
   102     for (int i = count; i > 0; --i) {
   103         *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
   104         fx += dx;
   105     }
   106 }
   108 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
   109                                  uint32_t xy[], int count, int x, int y) {
   110     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
   111     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
   112                              SkMatrix::kScale_Mask |
   113                              SkMatrix::kAffine_Mask)) == 0);
   115     PREAMBLE(s);
   116     SkPoint srcPt;
   117     s.fInvProc(s.fInvMatrix,
   118                SkIntToScalar(x) + SK_ScalarHalf,
   119                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
   121     SkFractionalInt fx = SkScalarToFractionalInt(srcPt.fX);
   122     SkFractionalInt fy = SkScalarToFractionalInt(srcPt.fY);
   123     SkFractionalInt dx = s.fInvSxFractionalInt;
   124     SkFractionalInt dy = s.fInvKyFractionalInt;
   125     int maxX = s.fBitmap->width() - 1;
   126     int maxY = s.fBitmap->height() - 1;
   128     if (count >= 8) {
   129         SkFractionalInt dx4 = dx * 4;
   130         SkFractionalInt dy4 = dy * 4;
   131         SkFractionalInt dx8 = dx * 8;
   132         SkFractionalInt dy8 = dy * 8;
   134         int32x4_t xbase, ybase;
   135         int32x4_t x2base, y2base;
   136         int16_t *dst16 = (int16_t *) xy;
   138         // now build fx, fx+dx, fx+2dx, fx+3dx
   139         xbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
   140         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1);
   141         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2);
   142         xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3);
   144         // same for fy
   145         ybase = vdupq_n_s32(SkFractionalIntToFixed(fy));
   146         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1);
   147         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2);
   148         ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3);
   150         x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
   151         y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4)));
   153         // store & bump
   154         do {
   155             int16x8x2_t hi16;
   157             hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
   158             hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
   160             vst2q_s16(dst16, hi16);
   162             // moving base and on to the next
   163             xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
   164             ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
   165             x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
   166             y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
   168             dst16 += 16; // 8x32 aka 16x16
   169             count -= 8;
   170             fx += dx8;
   171             fy += dy8;
   172         } while (count >= 8);
   173         xy = (uint32_t *) dst16;
   174     }
   176     for (int i = count; i > 0; --i) {
   177         *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) |
   178                  TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
   179         fx += dx; fy += dy;
   180     }
   181 }
   183 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
   184                                 uint32_t* SK_RESTRICT xy,
   185                                 int count, int x, int y) {
   186     SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
   188     PREAMBLE(s);
   189     // max{X,Y} are int here, but later shown/assumed to fit in 16 bits
   190     int maxX = s.fBitmap->width() - 1;
   191     int maxY = s.fBitmap->height() - 1;
   193     SkPerspIter iter(s.fInvMatrix,
   194                      SkIntToScalar(x) + SK_ScalarHalf,
   195                      SkIntToScalar(y) + SK_ScalarHalf, count);
   197     while ((count = iter.next()) != 0) {
   198         const SkFixed* SK_RESTRICT srcXY = iter.getXY();
   200         if (count >= 8) {
   201             int32_t *mysrc = (int32_t *) srcXY;
   202             int16_t *mydst = (int16_t *) xy;
   203             do {
   204                 int16x8x2_t hi16;
   205                 int32x4x2_t xy1, xy2;
   207                 xy1 = vld2q_s32(mysrc);
   208                 xy2 = vld2q_s32(mysrc+8);
   210                 hi16.val[0] = TILEX_PROCF_NEON8(xy1.val[0], xy2.val[0], maxX);
   211                 hi16.val[1] = TILEY_PROCF_NEON8(xy1.val[1], xy2.val[1], maxY);
   213                 vst2q_s16(mydst, hi16);
   215                 count -= 8;  // 8 iterations
   216                 mysrc += 16; // 16 longs
   217                 mydst += 16; // 16 shorts, aka 8 longs
   218             } while (count >= 8);
   219             // get xy and srcXY fixed up
   220             srcXY = (const SkFixed *) mysrc;
   221             xy = (uint32_t *) mydst;
   222         }
   224         while (--count >= 0) {
   225             *xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) |
   226                      TILEX_PROCF(srcXY[0], maxX);
   227             srcXY += 2;
   228         }
   229     }
   230 }
   232 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
   233                                           SkFixed one PREAMBLE_PARAM_Y) {
   234     unsigned i = TILEY_PROCF(f, max);
   235     i = (i << 4) | TILEY_LOW_BITS(f, max);
   236     return (i << 14) | (TILEY_PROCF((f + one), max));
   237 }
   239 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
   240                                           SkFixed one PREAMBLE_PARAM_X) {
   241     unsigned i = TILEX_PROCF(f, max);
   242     i = (i << 4) | TILEX_LOW_BITS(f, max);
   243     return (i << 14) | (TILEX_PROCF((f + one), max));
   244 }
   246 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
   247                                           SkFixed one PREAMBLE_PARAM_X) {
   248     int32x4_t ret, res, wide_one;
   250     // Prepare constants
   251     wide_one = vdupq_n_s32(one);
   253     // Step 1
   254     res = TILEX_PROCF_NEON4(f, max);
   256     // Step 2
   257     ret = TILEX_LOW_BITS_NEON4(f, max);
   258     ret = vsliq_n_s32(ret, res, 4);
   260     // Step 3
   261     res = TILEX_PROCF_NEON4(f + wide_one, max);
   262     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
   264     return ret;
   265 }
   267 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
   268                                           SkFixed one PREAMBLE_PARAM_X) {
   269     int32x4_t ret, res, wide_one;
   271     // Prepare constants
   272     wide_one = vdupq_n_s32(one);
   274     // Step 1
   275     res = TILEY_PROCF_NEON4(f, max);
   277     // Step 2
   278     ret = TILEY_LOW_BITS_NEON4(f, max);
   279     ret = vsliq_n_s32(ret, res, 4);
   281     // Step 3
   282     res = TILEY_PROCF_NEON4(f + wide_one, max);
   283     ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
   285     return ret;
   286 }
   288 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
   289                               uint32_t xy[], int count, int x, int y) {
   290     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
   291                              SkMatrix::kScale_Mask)) == 0);
   292     SkASSERT(s.fInvKy == 0);
   294     PREAMBLE(s);
   296     const unsigned maxX = s.fBitmap->width() - 1;
   297     const SkFixed one = s.fFilterOneX;
   298     const SkFractionalInt dx = s.fInvSxFractionalInt;
   299     SkFractionalInt fx;
   301     {
   302         SkPoint pt;
   303         s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
   304                                  SkIntToScalar(y) + SK_ScalarHalf, &pt);
   305         const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
   306         const unsigned maxY = s.fBitmap->height() - 1;
   307         // compute our two Y values up front
   308         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
   309         // now initialize fx
   310         fx = SkScalarToFractionalInt(pt.fX) - (SkFixedToFractionalInt(one) >> 1);
   311     }
   313 #ifdef CHECK_FOR_DECAL
   314     // test if we don't need to apply the tile proc
   315     if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
   316         decal_filter_scale_neon(xy, SkFractionalIntToFixed(fx),
   317                              SkFractionalIntToFixed(dx), count);
   318         return;
   319     }
   320 #endif
   321     {
   323     if (count >= 4) {
   324         int32x4_t wide_fx;
   326         wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
   327         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
   328         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
   329         wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3);
   331         while (count >= 4) {
   332             int32x4_t res;
   334             res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
   336             vst1q_u32(xy, vreinterpretq_u32_s32(res));
   338             wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
   339             fx += dx+dx+dx+dx;
   340             xy += 4;
   341             count -= 4;
   342         }
   343     }
   345     while (--count >= 0) {
   346         *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X);
   347         fx += dx;
   348     }
   350     }
   351 }
   353 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
   354                                uint32_t xy[], int count, int x, int y) {
   355     SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
   356     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
   357                              SkMatrix::kScale_Mask |
   358                              SkMatrix::kAffine_Mask)) == 0);
   360     PREAMBLE(s);
   361     SkPoint srcPt;
   362     s.fInvProc(s.fInvMatrix,
   363                SkIntToScalar(x) + SK_ScalarHalf,
   364                SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
   366     SkFixed oneX = s.fFilterOneX;
   367     SkFixed oneY = s.fFilterOneY;
   368     SkFixed fx = SkScalarToFixed(srcPt.fX) - (oneX >> 1);
   369     SkFixed fy = SkScalarToFixed(srcPt.fY) - (oneY >> 1);
   370     SkFixed dx = s.fInvSx;
   371     SkFixed dy = s.fInvKy;
   372     unsigned maxX = s.fBitmap->width() - 1;
   373     unsigned maxY = s.fBitmap->height() - 1;
   375     if (count >= 4) {
   376         int32x4_t wide_fy, wide_fx;
   378         wide_fx = vdupq_n_s32(fx);
   379         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
   380         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
   381         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
   383         wide_fy = vdupq_n_s32(fy);
   384         wide_fy = vsetq_lane_s32(fy+dy, wide_fy, 1);
   385         wide_fy = vsetq_lane_s32(fy+dy+dy, wide_fy, 2);
   386         wide_fy = vsetq_lane_s32(fy+dy+dy+dy, wide_fy, 3);
   388         while (count >= 4) {
   389             int32x4x2_t vxy;
   391             // do the X side, then the Y side, then interleave them
   392             vxy.val[0] = PACK_FILTER_Y4_NAME(wide_fy, maxY, oneY PREAMBLE_ARG_Y);
   393             vxy.val[1] = PACK_FILTER_X4_NAME(wide_fx, maxX, oneX PREAMBLE_ARG_X);
   395             // interleave as YXYXYXYX as part of the storing
   396             vst2q_s32((int32_t*)xy, vxy);
   398             // prepare next iteration
   399             wide_fx += vdupq_n_s32(dx+dx+dx+dx);
   400             fx += dx + dx + dx + dx;
   401             wide_fy += vdupq_n_s32(dy+dy+dy+dy);
   402             fy += dy+dy+dy+dy;
   403             xy += 8; // 4 x's, 4 y's
   404             count -= 4;
   405         }
   406     }
   408     while (--count >= 0) {
   409         // NB: writing Y/X
   410         *xy++ = PACK_FILTER_Y_NAME(fy, maxY, oneY PREAMBLE_ARG_Y);
   411         fy += dy;
   412         *xy++ = PACK_FILTER_X_NAME(fx, maxX, oneX PREAMBLE_ARG_X);
   413         fx += dx;
   414     }
   415 }
   417 static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
   418                               uint32_t* SK_RESTRICT xy, int count,
   419                               int x, int y) {
   420     SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
   422     PREAMBLE(s);
   423     unsigned maxX = s.fBitmap->width() - 1;
   424     unsigned maxY = s.fBitmap->height() - 1;
   425     SkFixed oneX = s.fFilterOneX;
   426     SkFixed oneY = s.fFilterOneY;
   428     SkPerspIter iter(s.fInvMatrix,
   429                      SkIntToScalar(x) + SK_ScalarHalf,
   430                      SkIntToScalar(y) + SK_ScalarHalf, count);
   432     while ((count = iter.next()) != 0) {
   433         const SkFixed* SK_RESTRICT srcXY = iter.getXY();
   435         while (count >= 4) {
   436             int32x4_t wide_x, wide_y;
   437             int32x4x2_t vxy, vresyx;
   439             // load src:  x-y-x-y-x-y-x-y
   440             vxy = vld2q_s32(srcXY);
   442             // do the X side, then the Y side, then interleave them
   443             wide_x = vsubq_s32(vxy.val[0], vdupq_n_s32(oneX>>1));
   444             wide_y = vsubq_s32(vxy.val[1], vdupq_n_s32(oneY>>1));
   446             vresyx.val[0] = PACK_FILTER_Y4_NAME(wide_y, maxY, oneY PREAMBLE_ARG_Y);
   447             vresyx.val[1] = PACK_FILTER_X4_NAME(wide_x, maxX, oneX PREAMBLE_ARG_X);
   449             // store interleaved as y-x-y-x-y-x-y-x (NB != read order)
   450             vst2q_s32((int32_t*)xy, vresyx);
   452             // on to the next iteration
   453             srcXY += 2*4;
   454             count -= 4;
   455             xy += 2*4;
   456         }
   458         while (--count >= 0) {
   459             // NB: we read x/y, we write y/x
   460             *xy++ = PACK_FILTER_Y_NAME(srcXY[1] - (oneY >> 1), maxY,
   461                                        oneY PREAMBLE_ARG_Y);
   462             *xy++ = PACK_FILTER_X_NAME(srcXY[0] - (oneX >> 1), maxX,
   463                                        oneX PREAMBLE_ARG_X);
   464             srcXY += 2;
   465         }
   466     }
   467 }
   469 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
   470     SCALE_NOFILTER_NAME,
   471     SCALE_FILTER_NAME,
   472     AFFINE_NOFILTER_NAME,
   473     AFFINE_FILTER_NAME,
   474     PERSP_NOFILTER_NAME,
   475     PERSP_FILTER_NAME
   476 };
   478 #undef TILEX_PROCF_NEON8
   479 #undef TILEY_PROCF_NEON8
   480 #undef TILEX_PROCF_NEON4
   481 #undef TILEY_PROCF_NEON4
   482 #undef TILEX_LOW_BITS_NEON4
   483 #undef TILEY_LOW_BITS_NEON4
   485 #undef MAKENAME
   486 #undef TILEX_PROCF
   487 #undef TILEY_PROCF
   488 #ifdef CHECK_FOR_DECAL
   489     #undef CHECK_FOR_DECAL
   490 #endif
   492 #undef SCALE_NOFILTER_NAME
   493 #undef SCALE_FILTER_NAME
   494 #undef AFFINE_NOFILTER_NAME
   495 #undef AFFINE_FILTER_NAME
   496 #undef PERSP_NOFILTER_NAME
   497 #undef PERSP_FILTER_NAME
   499 #undef PREAMBLE
   500 #undef PREAMBLE_PARAM_X
   501 #undef PREAMBLE_PARAM_Y
   502 #undef PREAMBLE_ARG_X
   503 #undef PREAMBLE_ARG_Y
   505 #undef TILEX_LOW_BITS
   506 #undef TILEY_LOW_BITS

mercurial