gfx/2d/Blur.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/2d/Blur.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,744 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include "Blur.h"
    1.11 +
    1.12 +#include <algorithm>
    1.13 +#include <math.h>
    1.14 +#include <string.h>
    1.15 +
    1.16 +#include "mozilla/CheckedInt.h"
    1.17 +#include "mozilla/Constants.h"
    1.18 +
    1.19 +#include "2D.h"
    1.20 +#include "DataSurfaceHelpers.h"
    1.21 +#include "Tools.h"
    1.22 +
    1.23 +using namespace std;
    1.24 +
    1.25 +namespace mozilla {
    1.26 +namespace gfx {
    1.27 +
    1.28 +/**
    1.29 + * Box blur involves looking at one pixel, and setting its value to the average
    1.30 + * of its neighbouring pixels.
    1.31 + * @param aInput The input buffer.
    1.32 + * @param aOutput The output buffer.
    1.33 + * @param aLeftLobe The number of pixels to blend on the left.
    1.34 + * @param aRightLobe The number of pixels to blend on the right.
    1.35 + * @param aWidth The number of columns in the buffers.
    1.36 + * @param aRows The number of rows in the buffers.
    1.37 + * @param aSkipRect An area to skip blurring in.
    1.38 + * XXX shouldn't we pass stride in separately here?
    1.39 + */
    1.40 +static void
    1.41 +BoxBlurHorizontal(unsigned char* aInput,
    1.42 +                  unsigned char* aOutput,
    1.43 +                  int32_t aLeftLobe,
    1.44 +                  int32_t aRightLobe,
    1.45 +                  int32_t aWidth,
    1.46 +                  int32_t aRows,
    1.47 +                  const IntRect& aSkipRect)
    1.48 +{
    1.49 +    MOZ_ASSERT(aWidth > 0);
    1.50 +
    1.51 +    int32_t boxSize = aLeftLobe + aRightLobe + 1;
    1.52 +    bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
    1.53 +                                  aWidth <= aSkipRect.XMost();
    1.54 +    if (boxSize == 1) {
    1.55 +        memcpy(aOutput, aInput, aWidth*aRows);
    1.56 +        return;
    1.57 +    }
    1.58 +    uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
    1.59 +
    1.60 +    for (int32_t y = 0; y < aRows; y++) {
    1.61 +        // Check whether the skip rect intersects this row. If the skip
    1.62 +        // rect covers the whole surface in this row, we can avoid
    1.63 +        // this row entirely (and any others along the skip rect).
    1.64 +        bool inSkipRectY = y >= aSkipRect.y &&
    1.65 +                           y < aSkipRect.YMost();
    1.66 +        if (inSkipRectY && skipRectCoversWholeRow) {
    1.67 +            y = aSkipRect.YMost() - 1;
    1.68 +            continue;
    1.69 +        }
    1.70 +
    1.71 +        uint32_t alphaSum = 0;
    1.72 +        for (int32_t i = 0; i < boxSize; i++) {
    1.73 +            int32_t pos = i - aLeftLobe;
    1.74 +            // See assertion above; if aWidth is zero, then we would have no
    1.75 +            // valid position to clamp to.
    1.76 +            pos = max(pos, 0);
    1.77 +            pos = min(pos, aWidth - 1);
    1.78 +            alphaSum += aInput[aWidth * y + pos];
    1.79 +        }
    1.80 +        for (int32_t x = 0; x < aWidth; x++) {
    1.81 +            // Check whether we are within the skip rect. If so, go
    1.82 +            // to the next point outside the skip rect.
    1.83 +            if (inSkipRectY && x >= aSkipRect.x &&
    1.84 +                x < aSkipRect.XMost()) {
    1.85 +                x = aSkipRect.XMost();
    1.86 +                if (x >= aWidth)
    1.87 +                    break;
    1.88 +
    1.89 +                // Recalculate the neighbouring alpha values for
    1.90 +                // our new point on the surface.
    1.91 +                alphaSum = 0;
    1.92 +                for (int32_t i = 0; i < boxSize; i++) {
    1.93 +                    int32_t pos = x + i - aLeftLobe;
    1.94 +                    // See assertion above; if aWidth is zero, then we would have no
    1.95 +                    // valid position to clamp to.
    1.96 +                    pos = max(pos, 0);
    1.97 +                    pos = min(pos, aWidth - 1);
    1.98 +                    alphaSum += aInput[aWidth * y + pos];
    1.99 +                }
   1.100 +            }
   1.101 +            int32_t tmp = x - aLeftLobe;
   1.102 +            int32_t last = max(tmp, 0);
   1.103 +            int32_t next = min(tmp + boxSize, aWidth - 1);
   1.104 +
   1.105 +            aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
   1.106 +
   1.107 +            alphaSum += aInput[aWidth * y + next] -
   1.108 +                        aInput[aWidth * y + last];
   1.109 +        }
   1.110 +    }
   1.111 +}
   1.112 +
   1.113 +/**
   1.114 + * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of
   1.115 + * left and right.
   1.116 + * XXX shouldn't we pass stride in separately here?
   1.117 + */
   1.118 +static void
   1.119 +BoxBlurVertical(unsigned char* aInput,
   1.120 +                unsigned char* aOutput,
   1.121 +                int32_t aTopLobe,
   1.122 +                int32_t aBottomLobe,
   1.123 +                int32_t aWidth,
   1.124 +                int32_t aRows,
   1.125 +                const IntRect& aSkipRect)
   1.126 +{
   1.127 +    MOZ_ASSERT(aRows > 0);
   1.128 +
   1.129 +    int32_t boxSize = aTopLobe + aBottomLobe + 1;
   1.130 +    bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
   1.131 +                                     aRows <= aSkipRect.YMost();
   1.132 +    if (boxSize == 1) {
   1.133 +        memcpy(aOutput, aInput, aWidth*aRows);
   1.134 +        return;
   1.135 +    }
   1.136 +    uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
   1.137 +
   1.138 +    for (int32_t x = 0; x < aWidth; x++) {
   1.139 +        bool inSkipRectX = x >= aSkipRect.x &&
   1.140 +                           x < aSkipRect.XMost();
   1.141 +        if (inSkipRectX && skipRectCoversWholeColumn) {
   1.142 +            x = aSkipRect.XMost() - 1;
   1.143 +            continue;
   1.144 +        }
   1.145 +
   1.146 +        uint32_t alphaSum = 0;
   1.147 +        for (int32_t i = 0; i < boxSize; i++) {
   1.148 +            int32_t pos = i - aTopLobe;
   1.149 +            // See assertion above; if aRows is zero, then we would have no
   1.150 +            // valid position to clamp to.
   1.151 +            pos = max(pos, 0);
   1.152 +            pos = min(pos, aRows - 1);
   1.153 +            alphaSum += aInput[aWidth * pos + x];
   1.154 +        }
   1.155 +        for (int32_t y = 0; y < aRows; y++) {
   1.156 +            if (inSkipRectX && y >= aSkipRect.y &&
   1.157 +                y < aSkipRect.YMost()) {
   1.158 +                y = aSkipRect.YMost();
   1.159 +                if (y >= aRows)
   1.160 +                    break;
   1.161 +
   1.162 +                alphaSum = 0;
   1.163 +                for (int32_t i = 0; i < boxSize; i++) {
   1.164 +                    int32_t pos = y + i - aTopLobe;
   1.165 +                    // See assertion above; if aRows is zero, then we would have no
   1.166 +                    // valid position to clamp to.
   1.167 +                    pos = max(pos, 0);
   1.168 +                    pos = min(pos, aRows - 1);
   1.169 +                    alphaSum += aInput[aWidth * pos + x];
   1.170 +                }
   1.171 +            }
   1.172 +            int32_t tmp = y - aTopLobe;
   1.173 +            int32_t last = max(tmp, 0);
   1.174 +            int32_t next = min(tmp + boxSize, aRows - 1);
   1.175 +
   1.176 +            aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
   1.177 +
   1.178 +            alphaSum += aInput[aWidth * next + x] -
   1.179 +                        aInput[aWidth * last + x];
   1.180 +        }
   1.181 +    }
   1.182 +}
   1.183 +
   1.184 +static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
   1.185 +{
   1.186 +    int32_t major, minor, final;
   1.187 +
   1.188 +    /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
   1.189 +     * some notes about approximating the Gaussian blur with box-blurs.
   1.190 +     * The comments below are in the terminology of that page.
   1.191 +     */
   1.192 +    int32_t z = aRadius / 3;
   1.193 +    switch (aRadius % 3) {
   1.194 +    case 0:
   1.195 +        // aRadius = z*3; choose d = 2*z + 1
   1.196 +        major = minor = final = z;
   1.197 +        break;
   1.198 +    case 1:
   1.199 +        // aRadius = z*3 + 1
   1.200 +        // This is a tricky case since there is no value of d which will
   1.201 +        // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
   1.202 +        // for some integer k, then the radius will be 3*k. If d is even,
   1.203 +        // i.e. d=2*k, then the radius will be 3*k - 1.
   1.204 +        // So we have to choose values that don't match the standard
   1.205 +        // algorithm.
   1.206 +        major = z + 1;
   1.207 +        minor = final = z;
   1.208 +        break;
   1.209 +    case 2:
   1.210 +        // aRadius = z*3 + 2; choose d = 2*z + 2
   1.211 +        major = final = z + 1;
   1.212 +        minor = z;
   1.213 +        break;
   1.214 +    default:
   1.215 +        // Mathematical impossibility!
   1.216 +        MOZ_ASSERT(false);
   1.217 +        major = minor = final = 0;
   1.218 +    }
   1.219 +    MOZ_ASSERT(major + minor + final == aRadius);
   1.220 +
   1.221 +    aLobes[0][0] = major;
   1.222 +    aLobes[0][1] = minor;
   1.223 +    aLobes[1][0] = minor;
   1.224 +    aLobes[1][1] = major;
   1.225 +    aLobes[2][0] = final;
   1.226 +    aLobes[2][1] = final;
   1.227 +}
   1.228 +
   1.229 +static void
   1.230 +SpreadHorizontal(unsigned char* aInput,
   1.231 +                 unsigned char* aOutput,
   1.232 +                 int32_t aRadius,
   1.233 +                 int32_t aWidth,
   1.234 +                 int32_t aRows,
   1.235 +                 int32_t aStride,
   1.236 +                 const IntRect& aSkipRect)
   1.237 +{
   1.238 +    if (aRadius == 0) {
   1.239 +        memcpy(aOutput, aInput, aStride * aRows);
   1.240 +        return;
   1.241 +    }
   1.242 +
   1.243 +    bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
   1.244 +                                    aWidth <= aSkipRect.XMost();
   1.245 +    for (int32_t y = 0; y < aRows; y++) {
   1.246 +        // Check whether the skip rect intersects this row. If the skip
   1.247 +        // rect covers the whole surface in this row, we can avoid
   1.248 +        // this row entirely (and any others along the skip rect).
   1.249 +        bool inSkipRectY = y >= aSkipRect.y &&
   1.250 +                             y < aSkipRect.YMost();
   1.251 +        if (inSkipRectY && skipRectCoversWholeRow) {
   1.252 +            y = aSkipRect.YMost() - 1;
   1.253 +            continue;
   1.254 +        }
   1.255 +
   1.256 +        for (int32_t x = 0; x < aWidth; x++) {
   1.257 +            // Check whether we are within the skip rect. If so, go
   1.258 +            // to the next point outside the skip rect.
   1.259 +            if (inSkipRectY && x >= aSkipRect.x &&
   1.260 +                x < aSkipRect.XMost()) {
   1.261 +                x = aSkipRect.XMost();
   1.262 +                if (x >= aWidth)
   1.263 +                    break;
   1.264 +            }
   1.265 +
   1.266 +            int32_t sMin = max(x - aRadius, 0);
   1.267 +            int32_t sMax = min(x + aRadius, aWidth - 1);
   1.268 +            int32_t v = 0;
   1.269 +            for (int32_t s = sMin; s <= sMax; ++s) {
   1.270 +                v = max<int32_t>(v, aInput[aStride * y + s]);
   1.271 +            }
   1.272 +            aOutput[aStride * y + x] = v;
   1.273 +        }
   1.274 +    }
   1.275 +}
   1.276 +
   1.277 +static void
   1.278 +SpreadVertical(unsigned char* aInput,
   1.279 +               unsigned char* aOutput,
   1.280 +               int32_t aRadius,
   1.281 +               int32_t aWidth,
   1.282 +               int32_t aRows,
   1.283 +               int32_t aStride,
   1.284 +               const IntRect& aSkipRect)
   1.285 +{
   1.286 +    if (aRadius == 0) {
   1.287 +        memcpy(aOutput, aInput, aStride * aRows);
   1.288 +        return;
   1.289 +    }
   1.290 +
   1.291 +    bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
   1.292 +                                     aRows <= aSkipRect.YMost();
   1.293 +    for (int32_t x = 0; x < aWidth; x++) {
   1.294 +        bool inSkipRectX = x >= aSkipRect.x &&
   1.295 +                           x < aSkipRect.XMost();
   1.296 +        if (inSkipRectX && skipRectCoversWholeColumn) {
   1.297 +            x = aSkipRect.XMost() - 1;
   1.298 +            continue;
   1.299 +        }
   1.300 +
   1.301 +        for (int32_t y = 0; y < aRows; y++) {
   1.302 +            // Check whether we are within the skip rect. If so, go
   1.303 +            // to the next point outside the skip rect.
   1.304 +            if (inSkipRectX && y >= aSkipRect.y &&
   1.305 +                y < aSkipRect.YMost()) {
   1.306 +                y = aSkipRect.YMost();
   1.307 +                if (y >= aRows)
   1.308 +                    break;
   1.309 +            }
   1.310 +
   1.311 +            int32_t sMin = max(y - aRadius, 0);
   1.312 +            int32_t sMax = min(y + aRadius, aRows - 1);
   1.313 +            int32_t v = 0;
   1.314 +            for (int32_t s = sMin; s <= sMax; ++s) {
   1.315 +                v = max<int32_t>(v, aInput[aStride * s + x]);
   1.316 +            }
   1.317 +            aOutput[aStride * y + x] = v;
   1.318 +        }
   1.319 +    }
   1.320 +}
   1.321 +
   1.322 +CheckedInt<int32_t>
   1.323 +AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
   1.324 +{
   1.325 +  CheckedInt<int32_t> val(aVal);
   1.326 +
   1.327 +  val += 3;
   1.328 +  val /= 4;
   1.329 +  val *= 4;
   1.330 +
   1.331 +  return val;
   1.332 +}
   1.333 +
   1.334 +AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
   1.335 +                           const IntSize& aSpreadRadius,
   1.336 +                           const IntSize& aBlurRadius,
   1.337 +                           const Rect* aDirtyRect,
   1.338 +                           const Rect* aSkipRect)
   1.339 + : mSpreadRadius(aSpreadRadius),
   1.340 +   mBlurRadius(aBlurRadius),
   1.341 +   mSurfaceAllocationSize(0)
   1.342 +{
   1.343 +  Rect rect(aRect);
   1.344 +  rect.Inflate(Size(aBlurRadius + aSpreadRadius));
   1.345 +  rect.RoundOut();
   1.346 +
   1.347 +  if (aDirtyRect) {
   1.348 +    // If we get passed a dirty rect from layout, we can minimize the
   1.349 +    // shadow size and make painting faster.
   1.350 +    mHasDirtyRect = true;
   1.351 +    mDirtyRect = *aDirtyRect;
   1.352 +    Rect requiredBlurArea = mDirtyRect.Intersect(rect);
   1.353 +    requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
   1.354 +    rect = requiredBlurArea.Intersect(rect);
   1.355 +  } else {
   1.356 +    mHasDirtyRect = false;
   1.357 +  }
   1.358 +
   1.359 +  mRect = IntRect(int32_t(rect.x), int32_t(rect.y),
   1.360 +                  int32_t(rect.width), int32_t(rect.height));
   1.361 +  if (mRect.IsEmpty()) {
   1.362 +    return;
   1.363 +  }
   1.364 +
   1.365 +  if (aSkipRect) {
   1.366 +    // If we get passed a skip rect, we can lower the amount of
   1.367 +    // blurring/spreading we need to do. We convert it to IntRect to avoid
   1.368 +    // expensive int<->float conversions if we were to use Rect instead.
   1.369 +    Rect skipRect = *aSkipRect;
   1.370 +    skipRect.RoundIn();
   1.371 +    skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
   1.372 +    mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y),
   1.373 +                        int32_t(skipRect.width), int32_t(skipRect.height));
   1.374 +
   1.375 +    mSkipRect = mSkipRect.Intersect(mRect);
   1.376 +    if (mSkipRect.IsEqualInterior(mRect))
   1.377 +      return;
   1.378 +
   1.379 +    mSkipRect -= mRect.TopLeft();
   1.380 +  } else {
   1.381 +    mSkipRect = IntRect(0, 0, 0, 0);
   1.382 +  }
   1.383 +
   1.384 +  CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width);
   1.385 +  if (stride.isValid()) {
   1.386 +    mStride = stride.value();
   1.387 +
   1.388 +    // We need to leave room for an additional 3 bytes for a potential overrun
   1.389 +    // in our blurring code.
   1.390 +    size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3);
   1.391 +    if (size != 0) {
   1.392 +      mSurfaceAllocationSize = size;
   1.393 +    }
   1.394 +  }
   1.395 +}
   1.396 +
   1.397 +AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
   1.398 +                           int32_t aStride,
   1.399 +                           float aSigmaX,
   1.400 +                           float aSigmaY)
   1.401 +  : mRect(int32_t(aRect.x), int32_t(aRect.y),
   1.402 +          int32_t(aRect.width), int32_t(aRect.height)),
   1.403 +    mSpreadRadius(),
   1.404 +    mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
   1.405 +    mStride(aStride),
   1.406 +    mSurfaceAllocationSize(0)
   1.407 +{
   1.408 +  IntRect intRect;
   1.409 +  if (aRect.ToIntRect(&intRect)) {
   1.410 +    size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height);
   1.411 +    if (minDataSize != 0) {
   1.412 +      mSurfaceAllocationSize = minDataSize;
   1.413 +    }
   1.414 +  }
   1.415 +}
   1.416 +
   1.417 +
   1.418 +AlphaBoxBlur::~AlphaBoxBlur()
   1.419 +{
   1.420 +}
   1.421 +
   1.422 +IntSize
   1.423 +AlphaBoxBlur::GetSize()
   1.424 +{
   1.425 +  IntSize size(mRect.width, mRect.height);
   1.426 +  return size;
   1.427 +}
   1.428 +
   1.429 +int32_t
   1.430 +AlphaBoxBlur::GetStride()
   1.431 +{
   1.432 +  return mStride;
   1.433 +}
   1.434 +
   1.435 +IntRect
   1.436 +AlphaBoxBlur::GetRect()
   1.437 +{
   1.438 +  return mRect;
   1.439 +}
   1.440 +
   1.441 +Rect*
   1.442 +AlphaBoxBlur::GetDirtyRect()
   1.443 +{
   1.444 +  if (mHasDirtyRect) {
   1.445 +    return &mDirtyRect;
   1.446 +  }
   1.447 +
   1.448 +  return nullptr;
   1.449 +}
   1.450 +
   1.451 +size_t
   1.452 +AlphaBoxBlur::GetSurfaceAllocationSize() const
   1.453 +{
   1.454 +  return mSurfaceAllocationSize;
   1.455 +}
   1.456 +
   1.457 +void
   1.458 +AlphaBoxBlur::Blur(uint8_t* aData)
   1.459 +{
   1.460 +  if (!aData) {
   1.461 +    return;
   1.462 +  }
   1.463 +
   1.464 +  // no need to do all this if not blurring or spreading
   1.465 +  if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
   1.466 +    int32_t stride = GetStride();
   1.467 +
   1.468 +    IntSize size = GetSize();
   1.469 +
   1.470 +    if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
   1.471 +      // No need to use CheckedInt here - we have validated it in the constructor.
   1.472 +      size_t szB = stride * size.height;
   1.473 +      unsigned char* tmpData = new (std::nothrow) uint8_t[szB];
   1.474 +
   1.475 +      if (!tmpData) {
   1.476 +        return;
   1.477 +      }
   1.478 +
   1.479 +      memset(tmpData, 0, szB);
   1.480 +
   1.481 +      SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect);
   1.482 +      SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect);
   1.483 +
   1.484 +      delete [] tmpData;
   1.485 +    }
   1.486 +
   1.487 +    int32_t horizontalLobes[3][2];
   1.488 +    ComputeLobes(mBlurRadius.width, horizontalLobes);
   1.489 +    int32_t verticalLobes[3][2];
   1.490 +    ComputeLobes(mBlurRadius.height, verticalLobes);
   1.491 +
   1.492 +    // We want to allow for some extra space on the left for alignment reasons.
   1.493 +    int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
   1.494 +
   1.495 +    IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
   1.496 +                              size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
   1.497 +
   1.498 +    if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
   1.499 +      // Fallback to old blurring code when the surface is so large it may
   1.500 +      // overflow our integral image!
   1.501 +
   1.502 +      // No need to use CheckedInt here - we have validated it in the constructor.
   1.503 +      size_t szB = stride * size.height;
   1.504 +      uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
   1.505 +      if (!tmpData) {
   1.506 +        return;
   1.507 +      }
   1.508 +
   1.509 +      memset(tmpData, 0, szB);
   1.510 +
   1.511 +      uint8_t* a = aData;
   1.512 +      uint8_t* b = tmpData;
   1.513 +      if (mBlurRadius.width > 0) {
   1.514 +        BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect);
   1.515 +        BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect);
   1.516 +        BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect);
   1.517 +      } else {
   1.518 +        a = tmpData;
   1.519 +        b = aData;
   1.520 +      }
   1.521 +      // The result is in 'b' here.
   1.522 +      if (mBlurRadius.height > 0) {
   1.523 +        BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect);
   1.524 +        BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect);
   1.525 +        BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect);
   1.526 +      } else {
   1.527 +        a = b;
   1.528 +      }
   1.529 +      // The result is in 'a' here.
   1.530 +      if (a == tmpData) {
   1.531 +        memcpy(aData, tmpData, szB);
   1.532 +      }
   1.533 +      delete [] tmpData;
   1.534 +    } else {
   1.535 +      size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width * 4);
   1.536 +
   1.537 +      // We need to leave room for an additional 12 bytes for a maximum overrun
   1.538 +      // of 3 pixels in the blurring code.
   1.539 +      size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
   1.540 +      if (bufLen == 0) {
   1.541 +        return;
   1.542 +      }
   1.543 +      // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
   1.544 +      // we divide by 4.
   1.545 +      AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
   1.546 +
   1.547 +      if (!integralImage) {
   1.548 +        return;
   1.549 +      }
   1.550 +#ifdef USE_SSE2
   1.551 +      if (Factory::HasSSE2()) {
   1.552 +        BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
   1.553 +                     verticalLobes[0][1], integralImage, integralImageStride);
   1.554 +        BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
   1.555 +                     verticalLobes[1][1], integralImage, integralImageStride);
   1.556 +        BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
   1.557 +                     verticalLobes[2][1], integralImage, integralImageStride);
   1.558 +      } else
   1.559 +#endif
   1.560 +      {
   1.561 +        BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
   1.562 +                  verticalLobes[0][1], integralImage, integralImageStride);
   1.563 +        BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
   1.564 +                  verticalLobes[1][1], integralImage, integralImageStride);
   1.565 +        BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
   1.566 +                  verticalLobes[2][1], integralImage, integralImageStride);
   1.567 +      }
   1.568 +    }
   1.569 +  }
   1.570 +}
   1.571 +
   1.572 +MOZ_ALWAYS_INLINE void
   1.573 +GenerateIntegralRow(uint32_t  *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
   1.574 +                    const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
   1.575 +{
   1.576 +  uint32_t currentRowSum = 0;
   1.577 +  uint32_t pixel = aSource[0];
   1.578 +  for (uint32_t x = 0; x < aLeftInflation; x++) {
   1.579 +    currentRowSum += pixel;
   1.580 +    *aDest++ = currentRowSum + *aPreviousRow++;
   1.581 +  }
   1.582 +  for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
   1.583 +      uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
   1.584 +#if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
   1.585 +      currentRowSum += (alphaValues >> 24) & 0xff;
   1.586 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.587 +      currentRowSum += (alphaValues >> 16) & 0xff;
   1.588 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.589 +      currentRowSum += (alphaValues >> 8) & 0xff;
   1.590 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.591 +      currentRowSum += alphaValues & 0xff;
   1.592 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.593 +#else
   1.594 +      currentRowSum += alphaValues & 0xff;
   1.595 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.596 +      alphaValues >>= 8;
   1.597 +      currentRowSum += alphaValues & 0xff;
   1.598 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.599 +      alphaValues >>= 8;
   1.600 +      currentRowSum += alphaValues & 0xff;
   1.601 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.602 +      alphaValues >>= 8;
   1.603 +      currentRowSum += alphaValues & 0xff;
   1.604 +      *aDest++ = *aPreviousRow++ + currentRowSum;
   1.605 +#endif
   1.606 +  }
   1.607 +  pixel = aSource[aSourceWidth - 1];
   1.608 +  for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
   1.609 +    currentRowSum += pixel;
   1.610 +    *aDest++ = currentRowSum + *aPreviousRow++;
   1.611 +  }
   1.612 +}
   1.613 +
   1.614 +MOZ_ALWAYS_INLINE void
   1.615 +GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
   1.616 +                        int32_t aTopInflation, int32_t aBottomInflation,
   1.617 +                        uint32_t *aIntegralImage, size_t aIntegralImageStride,
   1.618 +                        uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
   1.619 +{
   1.620 +  uint32_t stride32bit = aIntegralImageStride / 4;
   1.621 +
   1.622 +  IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
   1.623 +                            aSize.height + aTopInflation + aBottomInflation);
   1.624 +
   1.625 +  memset(aIntegralImage, 0, aIntegralImageStride);
   1.626 +
   1.627 +  GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
   1.628 +                      aSize.width, aLeftInflation, aRightInflation);
   1.629 +  for (int y = 1; y < aTopInflation + 1; y++) {
   1.630 +    GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
   1.631 +                        aSize.width, aLeftInflation, aRightInflation);
   1.632 +  }
   1.633 +
   1.634 +  for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
   1.635 +    GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
   1.636 +                        aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
   1.637 +  }
   1.638 +
   1.639 +  if (aBottomInflation) {
   1.640 +    for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
   1.641 +      GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
   1.642 +                          aIntegralImage + (y - 1) * stride32bit,
   1.643 +                          aSize.width, aLeftInflation, aRightInflation);
   1.644 +    }
   1.645 +  }
   1.646 +}
   1.647 +
   1.648 +/**
   1.649 + * Attempt to do an in-place box blur using an integral image.
   1.650 + */
   1.651 +void
   1.652 +AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
   1.653 +                        int32_t aLeftLobe,
   1.654 +                        int32_t aRightLobe,
   1.655 +                        int32_t aTopLobe,
   1.656 +                        int32_t aBottomLobe,
   1.657 +                        uint32_t *aIntegralImage,
   1.658 +                        size_t aIntegralImageStride)
   1.659 +{
   1.660 +  IntSize size = GetSize();
   1.661 +
   1.662 +  MOZ_ASSERT(size.width > 0);
   1.663 +
   1.664 +  // Our 'left' or 'top' lobe will include the current pixel. i.e. when
   1.665 +  // looking at an integral image the value of a pixel at 'x,y' is calculated
   1.666 +  // using the value of the integral image values above/below that.
   1.667 +  aLeftLobe++;
   1.668 +  aTopLobe++;
   1.669 +  int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
   1.670 +
   1.671 +  MOZ_ASSERT(boxSize > 0);
   1.672 +
   1.673 +  if (boxSize == 1) {
   1.674 +      return;
   1.675 +  }
   1.676 +
   1.677 +  int32_t stride32bit = aIntegralImageStride / 4;
   1.678 +
   1.679 +  int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
   1.680 +
   1.681 +  GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
   1.682 +                          aIntegralImage, aIntegralImageStride, aData,
   1.683 +                          mStride, size);
   1.684 +
   1.685 +  uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
   1.686 +
   1.687 +  uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
   1.688 +
   1.689 +  // Storing these locally makes this about 30% faster! Presumably the compiler
   1.690 +  // can't be sure we're not altering the member variables in this loop.
   1.691 +  IntRect skipRect = mSkipRect;
   1.692 +  uint8_t *data = aData;
   1.693 +  int32_t stride = mStride;
   1.694 +  for (int32_t y = 0; y < size.height; y++) {
   1.695 +    bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
   1.696 +
   1.697 +    uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
   1.698 +    uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
   1.699 +    uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
   1.700 +    uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
   1.701 +
   1.702 +    for (int32_t x = 0; x < size.width; x++) {
   1.703 +      if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) {
   1.704 +        x = skipRect.XMost() - 1;
   1.705 +        // Trigger early jump on coming loop iterations, this will be reset
   1.706 +        // next line anyway.
   1.707 +        inSkipRectY = false;
   1.708 +        continue;
   1.709 +      }
   1.710 +      int32_t topLeft = topLeftBase[x];
   1.711 +      int32_t topRight = topRightBase[x];
   1.712 +      int32_t bottomRight = bottomRightBase[x];
   1.713 +      int32_t bottomLeft = bottomLeftBase[x];
   1.714 +
   1.715 +      uint32_t value = bottomRight - topRight - bottomLeft;
   1.716 +      value += topLeft;
   1.717 +
   1.718 +      data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
   1.719 +    }
   1.720 +  }
   1.721 +}
   1.722 +
   1.723 +/**
   1.724 + * Compute the box blur size (which we're calling the blur radius) from
   1.725 + * the standard deviation.
   1.726 + *
   1.727 + * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
   1.728 + * approximating a Gaussian using box blurs.  This yields quite a good
   1.729 + * approximation for a Gaussian.  Then we multiply this by 1.5 since our
   1.730 + * code wants the radius of the entire triple-box-blur kernel instead of
   1.731 + * the diameter of an individual box blur.  For more details, see:
   1.732 + *   http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
   1.733 + *   https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
   1.734 + */
   1.735 +static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
   1.736 +
   1.737 +IntSize
   1.738 +AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
   1.739 +{
   1.740 +    IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5)),
   1.741 +                 static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5)));
   1.742 +
   1.743 +    return size;
   1.744 +}
   1.745 +
   1.746 +}
   1.747 +}

mercurial