gfx/skia/trunk/src/opts/SkBlurImage_opts_SSE2.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/skia/trunk/src/opts/SkBlurImage_opts_SSE2.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,117 @@
     1.4 +/*
     1.5 + * Copyright 2013 The Android Open Source Project
     1.6 + *
     1.7 + * Use of this source code is governed by a BSD-style license that can be
     1.8 + * found in the LICENSE file.
     1.9 + */
    1.10 +
    1.11 +
    1.12 +#include "SkBitmap.h"
    1.13 +#include "SkColorPriv.h"
    1.14 +#include "SkBlurImage_opts_SSE2.h"
    1.15 +#include "SkRect.h"
    1.16 +
    1.17 +#include <emmintrin.h>
    1.18 +
    1.19 +namespace {
    1.20 +
    1.21 +enum BlurDirection {
    1.22 +    kX, kY
    1.23 +};
    1.24 +
    1.25 +/**
    1.26 + * Helper function to spread the components of a 32-bit integer into the
    1.27 + * lower 8 bits of each 32-bit element of an SSE register.
    1.28 + */
    1.29 +
    1.30 +inline __m128i expand(int a) {
    1.31 +      const __m128i zero = _mm_setzero_si128();
    1.32 +
    1.33 +      // 0 0 0 0   0 0 0 0   0 0 0 0   A R G B
    1.34 +      __m128i result = _mm_cvtsi32_si128(a);
    1.35 +
    1.36 +      // 0 0 0 0   0 0 0 0   0 A 0 R   0 G 0 B
    1.37 +      result = _mm_unpacklo_epi8(result, zero);
    1.38 +
    1.39 +      // 0 0 0 A   0 0 0 R   0 0 0 G   0 0 0 B
    1.40 +      return _mm_unpacklo_epi16(result, zero);
    1.41 +}
    1.42 +
    1.43 +template<BlurDirection srcDirection, BlurDirection dstDirection>
    1.44 +void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
    1.45 +                    int leftOffset, int rightOffset, int width, int height)
    1.46 +{
    1.47 +    const int rightBorder = SkMin32(rightOffset + 1, width);
    1.48 +    const int srcStrideX = srcDirection == kX ? 1 : srcStride;
    1.49 +    const int dstStrideX = dstDirection == kX ? 1 : height;
    1.50 +    const int srcStrideY = srcDirection == kX ? srcStride : 1;
    1.51 +    const int dstStrideY = dstDirection == kX ? width : 1;
    1.52 +    const __m128i scale = _mm_set1_epi32((1 << 24) / kernelSize);
    1.53 +    const __m128i half = _mm_set1_epi32(1 << 23);
    1.54 +    const __m128i zero = _mm_setzero_si128();
    1.55 +    for (int y = 0; y < height; ++y) {
    1.56 +        __m128i sum = zero;
    1.57 +        const SkPMColor* p = src;
    1.58 +        for (int i = 0; i < rightBorder; ++i) {
    1.59 +            sum = _mm_add_epi32(sum, expand(*p));
    1.60 +            p += srcStrideX;
    1.61 +        }
    1.62 +
    1.63 +        const SkPMColor* sptr = src;
    1.64 +        SkColor* dptr = dst;
    1.65 +        for (int x = 0; x < width; ++x) {
    1.66 +#if 0
    1.67 +            // In SSE4.1, this would be
    1.68 +            __m128i result = _mm_mullo_epi32(sum, scale);
    1.69 +#else
    1.70 +            // But SSE2 has no PMULLUD, so we must do AG and RB separately.
    1.71 +            __m128i tmp1 = _mm_mul_epu32(sum, scale);
    1.72 +            __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
    1.73 +                                         _mm_srli_si128(scale, 4));
    1.74 +            __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)),
    1.75 +                                                _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0)));
    1.76 +#endif
    1.77 +            // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
    1.78 +            result = _mm_add_epi32(result, half);
    1.79 +
    1.80 +            // 0 0 0 A   0 0 0 R   0 0 0 G   0 0 0 B
    1.81 +            result = _mm_srli_epi32(result, 24);
    1.82 +
    1.83 +            // 0 0 0 0   0 0 0 0   0 A 0 R   0 G 0 B
    1.84 +            result = _mm_packs_epi32(result, zero);
    1.85 +
    1.86 +            // 0 0 0 0   0 0 0 0   0 0 0 0   A R G B
    1.87 +            result = _mm_packus_epi16(result, zero);
    1.88 +            *dptr = _mm_cvtsi128_si32(result);
    1.89 +            if (x >= leftOffset) {
    1.90 +                SkColor l = *(sptr - leftOffset * srcStrideX);
    1.91 +                sum = _mm_sub_epi32(sum, expand(l));
    1.92 +            }
    1.93 +            if (x + rightOffset + 1 < width) {
    1.94 +                SkColor r = *(sptr + (rightOffset + 1) * srcStrideX);
    1.95 +                sum = _mm_add_epi32(sum, expand(r));
    1.96 +            }
    1.97 +            sptr += srcStrideX;
    1.98 +            if (srcDirection == kY) {
    1.99 +                _mm_prefetch(reinterpret_cast<const char*>(sptr + (rightOffset + 1) * srcStrideX),
   1.100 +                             _MM_HINT_T0);
   1.101 +            }
   1.102 +            dptr += dstStrideX;
   1.103 +        }
   1.104 +        src += srcStrideY;
   1.105 +        dst += dstStrideY;
   1.106 +    }
   1.107 +}
   1.108 +
   1.109 +} // namespace
   1.110 +
   1.111 +bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,
   1.112 +                                    SkBoxBlurProc* boxBlurY,
   1.113 +                                    SkBoxBlurProc* boxBlurXY,
   1.114 +                                    SkBoxBlurProc* boxBlurYX) {
   1.115 +    *boxBlurX = SkBoxBlur_SSE2<kX, kX>;
   1.116 +    *boxBlurY = SkBoxBlur_SSE2<kY, kY>;
   1.117 +    *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;
   1.118 +    *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;
   1.119 +    return true;
   1.120 +}

mercurial