gfx/skia/trunk/src/opts/SkBlurImage_opts_SSE2.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 /*
michael@0 2 * Copyright 2013 The Android Open Source Project
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license that can be
michael@0 5 * found in the LICENSE file.
michael@0 6 */
michael@0 7
michael@0 8
michael@0 9 #include "SkBitmap.h"
michael@0 10 #include "SkColorPriv.h"
michael@0 11 #include "SkBlurImage_opts_SSE2.h"
michael@0 12 #include "SkRect.h"
michael@0 13
michael@0 14 #include <emmintrin.h>
michael@0 15
michael@0 16 namespace {
michael@0 17
michael@0 18 enum BlurDirection {
michael@0 19 kX, kY
michael@0 20 };
michael@0 21
michael@0 22 /**
michael@0 23 * Helper function to spread the components of a 32-bit integer into the
michael@0 24 * lower 8 bits of each 32-bit element of an SSE register.
michael@0 25 */
michael@0 26
michael@0 27 inline __m128i expand(int a) {
michael@0 28 const __m128i zero = _mm_setzero_si128();
michael@0 29
michael@0 30 // 0 0 0 0 0 0 0 0 0 0 0 0 A R G B
michael@0 31 __m128i result = _mm_cvtsi32_si128(a);
michael@0 32
michael@0 33 // 0 0 0 0 0 0 0 0 0 A 0 R 0 G 0 B
michael@0 34 result = _mm_unpacklo_epi8(result, zero);
michael@0 35
michael@0 36 // 0 0 0 A 0 0 0 R 0 0 0 G 0 0 0 B
michael@0 37 return _mm_unpacklo_epi16(result, zero);
michael@0 38 }
michael@0 39
michael@0 40 template<BlurDirection srcDirection, BlurDirection dstDirection>
michael@0 41 void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
michael@0 42 int leftOffset, int rightOffset, int width, int height)
michael@0 43 {
michael@0 44 const int rightBorder = SkMin32(rightOffset + 1, width);
michael@0 45 const int srcStrideX = srcDirection == kX ? 1 : srcStride;
michael@0 46 const int dstStrideX = dstDirection == kX ? 1 : height;
michael@0 47 const int srcStrideY = srcDirection == kX ? srcStride : 1;
michael@0 48 const int dstStrideY = dstDirection == kX ? width : 1;
michael@0 49 const __m128i scale = _mm_set1_epi32((1 << 24) / kernelSize);
michael@0 50 const __m128i half = _mm_set1_epi32(1 << 23);
michael@0 51 const __m128i zero = _mm_setzero_si128();
michael@0 52 for (int y = 0; y < height; ++y) {
michael@0 53 __m128i sum = zero;
michael@0 54 const SkPMColor* p = src;
michael@0 55 for (int i = 0; i < rightBorder; ++i) {
michael@0 56 sum = _mm_add_epi32(sum, expand(*p));
michael@0 57 p += srcStrideX;
michael@0 58 }
michael@0 59
michael@0 60 const SkPMColor* sptr = src;
michael@0 61 SkColor* dptr = dst;
michael@0 62 for (int x = 0; x < width; ++x) {
michael@0 63 #if 0
michael@0 64 // In SSE4.1, this would be
michael@0 65 __m128i result = _mm_mullo_epi32(sum, scale);
michael@0 66 #else
michael@0 67 // But SSE2 has no PMULLUD, so we must do AG and RB separately.
michael@0 68 __m128i tmp1 = _mm_mul_epu32(sum, scale);
michael@0 69 __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
michael@0 70 _mm_srli_si128(scale, 4));
michael@0 71 __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)),
michael@0 72 _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0)));
michael@0 73 #endif
michael@0 74 // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
michael@0 75 result = _mm_add_epi32(result, half);
michael@0 76
michael@0 77 // 0 0 0 A 0 0 0 R 0 0 0 G 0 0 0 B
michael@0 78 result = _mm_srli_epi32(result, 24);
michael@0 79
michael@0 80 // 0 0 0 0 0 0 0 0 0 A 0 R 0 G 0 B
michael@0 81 result = _mm_packs_epi32(result, zero);
michael@0 82
michael@0 83 // 0 0 0 0 0 0 0 0 0 0 0 0 A R G B
michael@0 84 result = _mm_packus_epi16(result, zero);
michael@0 85 *dptr = _mm_cvtsi128_si32(result);
michael@0 86 if (x >= leftOffset) {
michael@0 87 SkColor l = *(sptr - leftOffset * srcStrideX);
michael@0 88 sum = _mm_sub_epi32(sum, expand(l));
michael@0 89 }
michael@0 90 if (x + rightOffset + 1 < width) {
michael@0 91 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX);
michael@0 92 sum = _mm_add_epi32(sum, expand(r));
michael@0 93 }
michael@0 94 sptr += srcStrideX;
michael@0 95 if (srcDirection == kY) {
michael@0 96 _mm_prefetch(reinterpret_cast<const char*>(sptr + (rightOffset + 1) * srcStrideX),
michael@0 97 _MM_HINT_T0);
michael@0 98 }
michael@0 99 dptr += dstStrideX;
michael@0 100 }
michael@0 101 src += srcStrideY;
michael@0 102 dst += dstStrideY;
michael@0 103 }
michael@0 104 }
michael@0 105
michael@0 106 } // namespace
michael@0 107
michael@0 108 bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,
michael@0 109 SkBoxBlurProc* boxBlurY,
michael@0 110 SkBoxBlurProc* boxBlurXY,
michael@0 111 SkBoxBlurProc* boxBlurYX) {
michael@0 112 *boxBlurX = SkBoxBlur_SSE2<kX, kX>;
michael@0 113 *boxBlurY = SkBoxBlur_SSE2<kY, kY>;
michael@0 114 *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;
michael@0 115 *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;
michael@0 116 return true;
michael@0 117 }

mercurial