1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/2d/Blur.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,744 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "Blur.h" 1.11 + 1.12 +#include <algorithm> 1.13 +#include <math.h> 1.14 +#include <string.h> 1.15 + 1.16 +#include "mozilla/CheckedInt.h" 1.17 +#include "mozilla/Constants.h" 1.18 + 1.19 +#include "2D.h" 1.20 +#include "DataSurfaceHelpers.h" 1.21 +#include "Tools.h" 1.22 + 1.23 +using namespace std; 1.24 + 1.25 +namespace mozilla { 1.26 +namespace gfx { 1.27 + 1.28 +/** 1.29 + * Box blur involves looking at one pixel, and setting its value to the average 1.30 + * of its neighbouring pixels. 1.31 + * @param aInput The input buffer. 1.32 + * @param aOutput The output buffer. 1.33 + * @param aLeftLobe The number of pixels to blend on the left. 1.34 + * @param aRightLobe The number of pixels to blend on the right. 1.35 + * @param aWidth The number of columns in the buffers. 1.36 + * @param aRows The number of rows in the buffers. 1.37 + * @param aSkipRect An area to skip blurring in. 1.38 + * XXX shouldn't we pass stride in separately here? 1.39 + */ 1.40 +static void 1.41 +BoxBlurHorizontal(unsigned char* aInput, 1.42 + unsigned char* aOutput, 1.43 + int32_t aLeftLobe, 1.44 + int32_t aRightLobe, 1.45 + int32_t aWidth, 1.46 + int32_t aRows, 1.47 + const IntRect& aSkipRect) 1.48 +{ 1.49 + MOZ_ASSERT(aWidth > 0); 1.50 + 1.51 + int32_t boxSize = aLeftLobe + aRightLobe + 1; 1.52 + bool skipRectCoversWholeRow = 0 >= aSkipRect.x && 1.53 + aWidth <= aSkipRect.XMost(); 1.54 + if (boxSize == 1) { 1.55 + memcpy(aOutput, aInput, aWidth*aRows); 1.56 + return; 1.57 + } 1.58 + uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); 1.59 + 1.60 + for (int32_t y = 0; y < aRows; y++) { 1.61 + // Check whether the skip rect intersects this row. If the skip 1.62 + // rect covers the whole surface in this row, we can avoid 1.63 + // this row entirely (and any others along the skip rect). 1.64 + bool inSkipRectY = y >= aSkipRect.y && 1.65 + y < aSkipRect.YMost(); 1.66 + if (inSkipRectY && skipRectCoversWholeRow) { 1.67 + y = aSkipRect.YMost() - 1; 1.68 + continue; 1.69 + } 1.70 + 1.71 + uint32_t alphaSum = 0; 1.72 + for (int32_t i = 0; i < boxSize; i++) { 1.73 + int32_t pos = i - aLeftLobe; 1.74 + // See assertion above; if aWidth is zero, then we would have no 1.75 + // valid position to clamp to. 1.76 + pos = max(pos, 0); 1.77 + pos = min(pos, aWidth - 1); 1.78 + alphaSum += aInput[aWidth * y + pos]; 1.79 + } 1.80 + for (int32_t x = 0; x < aWidth; x++) { 1.81 + // Check whether we are within the skip rect. If so, go 1.82 + // to the next point outside the skip rect. 1.83 + if (inSkipRectY && x >= aSkipRect.x && 1.84 + x < aSkipRect.XMost()) { 1.85 + x = aSkipRect.XMost(); 1.86 + if (x >= aWidth) 1.87 + break; 1.88 + 1.89 + // Recalculate the neighbouring alpha values for 1.90 + // our new point on the surface. 1.91 + alphaSum = 0; 1.92 + for (int32_t i = 0; i < boxSize; i++) { 1.93 + int32_t pos = x + i - aLeftLobe; 1.94 + // See assertion above; if aWidth is zero, then we would have no 1.95 + // valid position to clamp to. 1.96 + pos = max(pos, 0); 1.97 + pos = min(pos, aWidth - 1); 1.98 + alphaSum += aInput[aWidth * y + pos]; 1.99 + } 1.100 + } 1.101 + int32_t tmp = x - aLeftLobe; 1.102 + int32_t last = max(tmp, 0); 1.103 + int32_t next = min(tmp + boxSize, aWidth - 1); 1.104 + 1.105 + aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32; 1.106 + 1.107 + alphaSum += aInput[aWidth * y + next] - 1.108 + aInput[aWidth * y + last]; 1.109 + } 1.110 + } 1.111 +} 1.112 + 1.113 +/** 1.114 + * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of 1.115 + * left and right. 1.116 + * XXX shouldn't we pass stride in separately here? 1.117 + */ 1.118 +static void 1.119 +BoxBlurVertical(unsigned char* aInput, 1.120 + unsigned char* aOutput, 1.121 + int32_t aTopLobe, 1.122 + int32_t aBottomLobe, 1.123 + int32_t aWidth, 1.124 + int32_t aRows, 1.125 + const IntRect& aSkipRect) 1.126 +{ 1.127 + MOZ_ASSERT(aRows > 0); 1.128 + 1.129 + int32_t boxSize = aTopLobe + aBottomLobe + 1; 1.130 + bool skipRectCoversWholeColumn = 0 >= aSkipRect.y && 1.131 + aRows <= aSkipRect.YMost(); 1.132 + if (boxSize == 1) { 1.133 + memcpy(aOutput, aInput, aWidth*aRows); 1.134 + return; 1.135 + } 1.136 + uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); 1.137 + 1.138 + for (int32_t x = 0; x < aWidth; x++) { 1.139 + bool inSkipRectX = x >= aSkipRect.x && 1.140 + x < aSkipRect.XMost(); 1.141 + if (inSkipRectX && skipRectCoversWholeColumn) { 1.142 + x = aSkipRect.XMost() - 1; 1.143 + continue; 1.144 + } 1.145 + 1.146 + uint32_t alphaSum = 0; 1.147 + for (int32_t i = 0; i < boxSize; i++) { 1.148 + int32_t pos = i - aTopLobe; 1.149 + // See assertion above; if aRows is zero, then we would have no 1.150 + // valid position to clamp to. 1.151 + pos = max(pos, 0); 1.152 + pos = min(pos, aRows - 1); 1.153 + alphaSum += aInput[aWidth * pos + x]; 1.154 + } 1.155 + for (int32_t y = 0; y < aRows; y++) { 1.156 + if (inSkipRectX && y >= aSkipRect.y && 1.157 + y < aSkipRect.YMost()) { 1.158 + y = aSkipRect.YMost(); 1.159 + if (y >= aRows) 1.160 + break; 1.161 + 1.162 + alphaSum = 0; 1.163 + for (int32_t i = 0; i < boxSize; i++) { 1.164 + int32_t pos = y + i - aTopLobe; 1.165 + // See assertion above; if aRows is zero, then we would have no 1.166 + // valid position to clamp to. 1.167 + pos = max(pos, 0); 1.168 + pos = min(pos, aRows - 1); 1.169 + alphaSum += aInput[aWidth * pos + x]; 1.170 + } 1.171 + } 1.172 + int32_t tmp = y - aTopLobe; 1.173 + int32_t last = max(tmp, 0); 1.174 + int32_t next = min(tmp + boxSize, aRows - 1); 1.175 + 1.176 + aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32; 1.177 + 1.178 + alphaSum += aInput[aWidth * next + x] - 1.179 + aInput[aWidth * last + x]; 1.180 + } 1.181 + } 1.182 +} 1.183 + 1.184 +static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2]) 1.185 +{ 1.186 + int32_t major, minor, final; 1.187 + 1.188 + /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for 1.189 + * some notes about approximating the Gaussian blur with box-blurs. 1.190 + * The comments below are in the terminology of that page. 1.191 + */ 1.192 + int32_t z = aRadius / 3; 1.193 + switch (aRadius % 3) { 1.194 + case 0: 1.195 + // aRadius = z*3; choose d = 2*z + 1 1.196 + major = minor = final = z; 1.197 + break; 1.198 + case 1: 1.199 + // aRadius = z*3 + 1 1.200 + // This is a tricky case since there is no value of d which will 1.201 + // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1 1.202 + // for some integer k, then the radius will be 3*k. If d is even, 1.203 + // i.e. d=2*k, then the radius will be 3*k - 1. 1.204 + // So we have to choose values that don't match the standard 1.205 + // algorithm. 1.206 + major = z + 1; 1.207 + minor = final = z; 1.208 + break; 1.209 + case 2: 1.210 + // aRadius = z*3 + 2; choose d = 2*z + 2 1.211 + major = final = z + 1; 1.212 + minor = z; 1.213 + break; 1.214 + default: 1.215 + // Mathematical impossibility! 1.216 + MOZ_ASSERT(false); 1.217 + major = minor = final = 0; 1.218 + } 1.219 + MOZ_ASSERT(major + minor + final == aRadius); 1.220 + 1.221 + aLobes[0][0] = major; 1.222 + aLobes[0][1] = minor; 1.223 + aLobes[1][0] = minor; 1.224 + aLobes[1][1] = major; 1.225 + aLobes[2][0] = final; 1.226 + aLobes[2][1] = final; 1.227 +} 1.228 + 1.229 +static void 1.230 +SpreadHorizontal(unsigned char* aInput, 1.231 + unsigned char* aOutput, 1.232 + int32_t aRadius, 1.233 + int32_t aWidth, 1.234 + int32_t aRows, 1.235 + int32_t aStride, 1.236 + const IntRect& aSkipRect) 1.237 +{ 1.238 + if (aRadius == 0) { 1.239 + memcpy(aOutput, aInput, aStride * aRows); 1.240 + return; 1.241 + } 1.242 + 1.243 + bool skipRectCoversWholeRow = 0 >= aSkipRect.x && 1.244 + aWidth <= aSkipRect.XMost(); 1.245 + for (int32_t y = 0; y < aRows; y++) { 1.246 + // Check whether the skip rect intersects this row. If the skip 1.247 + // rect covers the whole surface in this row, we can avoid 1.248 + // this row entirely (and any others along the skip rect). 1.249 + bool inSkipRectY = y >= aSkipRect.y && 1.250 + y < aSkipRect.YMost(); 1.251 + if (inSkipRectY && skipRectCoversWholeRow) { 1.252 + y = aSkipRect.YMost() - 1; 1.253 + continue; 1.254 + } 1.255 + 1.256 + for (int32_t x = 0; x < aWidth; x++) { 1.257 + // Check whether we are within the skip rect. If so, go 1.258 + // to the next point outside the skip rect. 1.259 + if (inSkipRectY && x >= aSkipRect.x && 1.260 + x < aSkipRect.XMost()) { 1.261 + x = aSkipRect.XMost(); 1.262 + if (x >= aWidth) 1.263 + break; 1.264 + } 1.265 + 1.266 + int32_t sMin = max(x - aRadius, 0); 1.267 + int32_t sMax = min(x + aRadius, aWidth - 1); 1.268 + int32_t v = 0; 1.269 + for (int32_t s = sMin; s <= sMax; ++s) { 1.270 + v = max<int32_t>(v, aInput[aStride * y + s]); 1.271 + } 1.272 + aOutput[aStride * y + x] = v; 1.273 + } 1.274 + } 1.275 +} 1.276 + 1.277 +static void 1.278 +SpreadVertical(unsigned char* aInput, 1.279 + unsigned char* aOutput, 1.280 + int32_t aRadius, 1.281 + int32_t aWidth, 1.282 + int32_t aRows, 1.283 + int32_t aStride, 1.284 + const IntRect& aSkipRect) 1.285 +{ 1.286 + if (aRadius == 0) { 1.287 + memcpy(aOutput, aInput, aStride * aRows); 1.288 + return; 1.289 + } 1.290 + 1.291 + bool skipRectCoversWholeColumn = 0 >= aSkipRect.y && 1.292 + aRows <= aSkipRect.YMost(); 1.293 + for (int32_t x = 0; x < aWidth; x++) { 1.294 + bool inSkipRectX = x >= aSkipRect.x && 1.295 + x < aSkipRect.XMost(); 1.296 + if (inSkipRectX && skipRectCoversWholeColumn) { 1.297 + x = aSkipRect.XMost() - 1; 1.298 + continue; 1.299 + } 1.300 + 1.301 + for (int32_t y = 0; y < aRows; y++) { 1.302 + // Check whether we are within the skip rect. If so, go 1.303 + // to the next point outside the skip rect. 1.304 + if (inSkipRectX && y >= aSkipRect.y && 1.305 + y < aSkipRect.YMost()) { 1.306 + y = aSkipRect.YMost(); 1.307 + if (y >= aRows) 1.308 + break; 1.309 + } 1.310 + 1.311 + int32_t sMin = max(y - aRadius, 0); 1.312 + int32_t sMax = min(y + aRadius, aRows - 1); 1.313 + int32_t v = 0; 1.314 + for (int32_t s = sMin; s <= sMax; ++s) { 1.315 + v = max<int32_t>(v, aInput[aStride * s + x]); 1.316 + } 1.317 + aOutput[aStride * y + x] = v; 1.318 + } 1.319 + } 1.320 +} 1.321 + 1.322 +CheckedInt<int32_t> 1.323 +AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal) 1.324 +{ 1.325 + CheckedInt<int32_t> val(aVal); 1.326 + 1.327 + val += 3; 1.328 + val /= 4; 1.329 + val *= 4; 1.330 + 1.331 + return val; 1.332 +} 1.333 + 1.334 +AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect, 1.335 + const IntSize& aSpreadRadius, 1.336 + const IntSize& aBlurRadius, 1.337 + const Rect* aDirtyRect, 1.338 + const Rect* aSkipRect) 1.339 + : mSpreadRadius(aSpreadRadius), 1.340 + mBlurRadius(aBlurRadius), 1.341 + mSurfaceAllocationSize(0) 1.342 +{ 1.343 + Rect rect(aRect); 1.344 + rect.Inflate(Size(aBlurRadius + aSpreadRadius)); 1.345 + rect.RoundOut(); 1.346 + 1.347 + if (aDirtyRect) { 1.348 + // If we get passed a dirty rect from layout, we can minimize the 1.349 + // shadow size and make painting faster. 1.350 + mHasDirtyRect = true; 1.351 + mDirtyRect = *aDirtyRect; 1.352 + Rect requiredBlurArea = mDirtyRect.Intersect(rect); 1.353 + requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius)); 1.354 + rect = requiredBlurArea.Intersect(rect); 1.355 + } else { 1.356 + mHasDirtyRect = false; 1.357 + } 1.358 + 1.359 + mRect = IntRect(int32_t(rect.x), int32_t(rect.y), 1.360 + int32_t(rect.width), int32_t(rect.height)); 1.361 + if (mRect.IsEmpty()) { 1.362 + return; 1.363 + } 1.364 + 1.365 + if (aSkipRect) { 1.366 + // If we get passed a skip rect, we can lower the amount of 1.367 + // blurring/spreading we need to do. We convert it to IntRect to avoid 1.368 + // expensive int<->float conversions if we were to use Rect instead. 1.369 + Rect skipRect = *aSkipRect; 1.370 + skipRect.RoundIn(); 1.371 + skipRect.Deflate(Size(aBlurRadius + aSpreadRadius)); 1.372 + mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y), 1.373 + int32_t(skipRect.width), int32_t(skipRect.height)); 1.374 + 1.375 + mSkipRect = mSkipRect.Intersect(mRect); 1.376 + if (mSkipRect.IsEqualInterior(mRect)) 1.377 + return; 1.378 + 1.379 + mSkipRect -= mRect.TopLeft(); 1.380 + } else { 1.381 + mSkipRect = IntRect(0, 0, 0, 0); 1.382 + } 1.383 + 1.384 + CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width); 1.385 + if (stride.isValid()) { 1.386 + mStride = stride.value(); 1.387 + 1.388 + // We need to leave room for an additional 3 bytes for a potential overrun 1.389 + // in our blurring code. 1.390 + size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3); 1.391 + if (size != 0) { 1.392 + mSurfaceAllocationSize = size; 1.393 + } 1.394 + } 1.395 +} 1.396 + 1.397 +AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect, 1.398 + int32_t aStride, 1.399 + float aSigmaX, 1.400 + float aSigmaY) 1.401 + : mRect(int32_t(aRect.x), int32_t(aRect.y), 1.402 + int32_t(aRect.width), int32_t(aRect.height)), 1.403 + mSpreadRadius(), 1.404 + mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))), 1.405 + mStride(aStride), 1.406 + mSurfaceAllocationSize(0) 1.407 +{ 1.408 + IntRect intRect; 1.409 + if (aRect.ToIntRect(&intRect)) { 1.410 + size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height); 1.411 + if (minDataSize != 0) { 1.412 + mSurfaceAllocationSize = minDataSize; 1.413 + } 1.414 + } 1.415 +} 1.416 + 1.417 + 1.418 +AlphaBoxBlur::~AlphaBoxBlur() 1.419 +{ 1.420 +} 1.421 + 1.422 +IntSize 1.423 +AlphaBoxBlur::GetSize() 1.424 +{ 1.425 + IntSize size(mRect.width, mRect.height); 1.426 + return size; 1.427 +} 1.428 + 1.429 +int32_t 1.430 +AlphaBoxBlur::GetStride() 1.431 +{ 1.432 + return mStride; 1.433 +} 1.434 + 1.435 +IntRect 1.436 +AlphaBoxBlur::GetRect() 1.437 +{ 1.438 + return mRect; 1.439 +} 1.440 + 1.441 +Rect* 1.442 +AlphaBoxBlur::GetDirtyRect() 1.443 +{ 1.444 + if (mHasDirtyRect) { 1.445 + return &mDirtyRect; 1.446 + } 1.447 + 1.448 + return nullptr; 1.449 +} 1.450 + 1.451 +size_t 1.452 +AlphaBoxBlur::GetSurfaceAllocationSize() const 1.453 +{ 1.454 + return mSurfaceAllocationSize; 1.455 +} 1.456 + 1.457 +void 1.458 +AlphaBoxBlur::Blur(uint8_t* aData) 1.459 +{ 1.460 + if (!aData) { 1.461 + return; 1.462 + } 1.463 + 1.464 + // no need to do all this if not blurring or spreading 1.465 + if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) { 1.466 + int32_t stride = GetStride(); 1.467 + 1.468 + IntSize size = GetSize(); 1.469 + 1.470 + if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) { 1.471 + // No need to use CheckedInt here - we have validated it in the constructor. 1.472 + size_t szB = stride * size.height; 1.473 + unsigned char* tmpData = new (std::nothrow) uint8_t[szB]; 1.474 + 1.475 + if (!tmpData) { 1.476 + return; 1.477 + } 1.478 + 1.479 + memset(tmpData, 0, szB); 1.480 + 1.481 + SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect); 1.482 + SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect); 1.483 + 1.484 + delete [] tmpData; 1.485 + } 1.486 + 1.487 + int32_t horizontalLobes[3][2]; 1.488 + ComputeLobes(mBlurRadius.width, horizontalLobes); 1.489 + int32_t verticalLobes[3][2]; 1.490 + ComputeLobes(mBlurRadius.height, verticalLobes); 1.491 + 1.492 + // We want to allow for some extra space on the left for alignment reasons. 1.493 + int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value(); 1.494 + 1.495 + IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1], 1.496 + size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1); 1.497 + 1.498 + if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) { 1.499 + // Fallback to old blurring code when the surface is so large it may 1.500 + // overflow our integral image! 1.501 + 1.502 + // No need to use CheckedInt here - we have validated it in the constructor. 1.503 + size_t szB = stride * size.height; 1.504 + uint8_t* tmpData = new (std::nothrow) uint8_t[szB]; 1.505 + if (!tmpData) { 1.506 + return; 1.507 + } 1.508 + 1.509 + memset(tmpData, 0, szB); 1.510 + 1.511 + uint8_t* a = aData; 1.512 + uint8_t* b = tmpData; 1.513 + if (mBlurRadius.width > 0) { 1.514 + BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect); 1.515 + BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect); 1.516 + BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect); 1.517 + } else { 1.518 + a = tmpData; 1.519 + b = aData; 1.520 + } 1.521 + // The result is in 'b' here. 1.522 + if (mBlurRadius.height > 0) { 1.523 + BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect); 1.524 + BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect); 1.525 + BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect); 1.526 + } else { 1.527 + a = b; 1.528 + } 1.529 + // The result is in 'a' here. 1.530 + if (a == tmpData) { 1.531 + memcpy(aData, tmpData, szB); 1.532 + } 1.533 + delete [] tmpData; 1.534 + } else { 1.535 + size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width * 4); 1.536 + 1.537 + // We need to leave room for an additional 12 bytes for a maximum overrun 1.538 + // of 3 pixels in the blurring code. 1.539 + size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12); 1.540 + if (bufLen == 0) { 1.541 + return; 1.542 + } 1.543 + // bufLen is a byte count, but here we want a multiple of 32-bit ints, so 1.544 + // we divide by 4. 1.545 + AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0)); 1.546 + 1.547 + if (!integralImage) { 1.548 + return; 1.549 + } 1.550 +#ifdef USE_SSE2 1.551 + if (Factory::HasSSE2()) { 1.552 + BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0], 1.553 + verticalLobes[0][1], integralImage, integralImageStride); 1.554 + BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0], 1.555 + verticalLobes[1][1], integralImage, integralImageStride); 1.556 + BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0], 1.557 + verticalLobes[2][1], integralImage, integralImageStride); 1.558 + } else 1.559 +#endif 1.560 + { 1.561 + BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0], 1.562 + verticalLobes[0][1], integralImage, integralImageStride); 1.563 + BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0], 1.564 + verticalLobes[1][1], integralImage, integralImageStride); 1.565 + BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0], 1.566 + verticalLobes[2][1], integralImage, integralImageStride); 1.567 + } 1.568 + } 1.569 + } 1.570 +} 1.571 + 1.572 +MOZ_ALWAYS_INLINE void 1.573 +GenerateIntegralRow(uint32_t *aDest, const uint8_t *aSource, uint32_t *aPreviousRow, 1.574 + const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation) 1.575 +{ 1.576 + uint32_t currentRowSum = 0; 1.577 + uint32_t pixel = aSource[0]; 1.578 + for (uint32_t x = 0; x < aLeftInflation; x++) { 1.579 + currentRowSum += pixel; 1.580 + *aDest++ = currentRowSum + *aPreviousRow++; 1.581 + } 1.582 + for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) { 1.583 + uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation)); 1.584 +#if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__ 1.585 + currentRowSum += (alphaValues >> 24) & 0xff; 1.586 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.587 + currentRowSum += (alphaValues >> 16) & 0xff; 1.588 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.589 + currentRowSum += (alphaValues >> 8) & 0xff; 1.590 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.591 + currentRowSum += alphaValues & 0xff; 1.592 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.593 +#else 1.594 + currentRowSum += alphaValues & 0xff; 1.595 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.596 + alphaValues >>= 8; 1.597 + currentRowSum += alphaValues & 0xff; 1.598 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.599 + alphaValues >>= 8; 1.600 + currentRowSum += alphaValues & 0xff; 1.601 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.602 + alphaValues >>= 8; 1.603 + currentRowSum += alphaValues & 0xff; 1.604 + *aDest++ = *aPreviousRow++ + currentRowSum; 1.605 +#endif 1.606 + } 1.607 + pixel = aSource[aSourceWidth - 1]; 1.608 + for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) { 1.609 + currentRowSum += pixel; 1.610 + *aDest++ = currentRowSum + *aPreviousRow++; 1.611 + } 1.612 +} 1.613 + 1.614 +MOZ_ALWAYS_INLINE void 1.615 +GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation, 1.616 + int32_t aTopInflation, int32_t aBottomInflation, 1.617 + uint32_t *aIntegralImage, size_t aIntegralImageStride, 1.618 + uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize) 1.619 +{ 1.620 + uint32_t stride32bit = aIntegralImageStride / 4; 1.621 + 1.622 + IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation, 1.623 + aSize.height + aTopInflation + aBottomInflation); 1.624 + 1.625 + memset(aIntegralImage, 0, aIntegralImageStride); 1.626 + 1.627 + GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage, 1.628 + aSize.width, aLeftInflation, aRightInflation); 1.629 + for (int y = 1; y < aTopInflation + 1; y++) { 1.630 + GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit, 1.631 + aSize.width, aLeftInflation, aRightInflation); 1.632 + } 1.633 + 1.634 + for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) { 1.635 + GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation), 1.636 + aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation); 1.637 + } 1.638 + 1.639 + if (aBottomInflation) { 1.640 + for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) { 1.641 + GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride), 1.642 + aIntegralImage + (y - 1) * stride32bit, 1.643 + aSize.width, aLeftInflation, aRightInflation); 1.644 + } 1.645 + } 1.646 +} 1.647 + 1.648 +/** 1.649 + * Attempt to do an in-place box blur using an integral image. 1.650 + */ 1.651 +void 1.652 +AlphaBoxBlur::BoxBlur_C(uint8_t* aData, 1.653 + int32_t aLeftLobe, 1.654 + int32_t aRightLobe, 1.655 + int32_t aTopLobe, 1.656 + int32_t aBottomLobe, 1.657 + uint32_t *aIntegralImage, 1.658 + size_t aIntegralImageStride) 1.659 +{ 1.660 + IntSize size = GetSize(); 1.661 + 1.662 + MOZ_ASSERT(size.width > 0); 1.663 + 1.664 + // Our 'left' or 'top' lobe will include the current pixel. i.e. when 1.665 + // looking at an integral image the value of a pixel at 'x,y' is calculated 1.666 + // using the value of the integral image values above/below that. 1.667 + aLeftLobe++; 1.668 + aTopLobe++; 1.669 + int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe); 1.670 + 1.671 + MOZ_ASSERT(boxSize > 0); 1.672 + 1.673 + if (boxSize == 1) { 1.674 + return; 1.675 + } 1.676 + 1.677 + int32_t stride32bit = aIntegralImageStride / 4; 1.678 + 1.679 + int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value(); 1.680 + 1.681 + GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe, 1.682 + aIntegralImage, aIntegralImageStride, aData, 1.683 + mStride, size); 1.684 + 1.685 + uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); 1.686 + 1.687 + uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation; 1.688 + 1.689 + // Storing these locally makes this about 30% faster! Presumably the compiler 1.690 + // can't be sure we're not altering the member variables in this loop. 1.691 + IntRect skipRect = mSkipRect; 1.692 + uint8_t *data = aData; 1.693 + int32_t stride = mStride; 1.694 + for (int32_t y = 0; y < size.height; y++) { 1.695 + bool inSkipRectY = y > skipRect.y && y < skipRect.YMost(); 1.696 + 1.697 + uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe); 1.698 + uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe); 1.699 + uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe); 1.700 + uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe); 1.701 + 1.702 + for (int32_t x = 0; x < size.width; x++) { 1.703 + if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) { 1.704 + x = skipRect.XMost() - 1; 1.705 + // Trigger early jump on coming loop iterations, this will be reset 1.706 + // next line anyway. 1.707 + inSkipRectY = false; 1.708 + continue; 1.709 + } 1.710 + int32_t topLeft = topLeftBase[x]; 1.711 + int32_t topRight = topRightBase[x]; 1.712 + int32_t bottomRight = bottomRightBase[x]; 1.713 + int32_t bottomLeft = bottomLeftBase[x]; 1.714 + 1.715 + uint32_t value = bottomRight - topRight - bottomLeft; 1.716 + value += topLeft; 1.717 + 1.718 + data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32; 1.719 + } 1.720 + } 1.721 +} 1.722 + 1.723 +/** 1.724 + * Compute the box blur size (which we're calling the blur radius) from 1.725 + * the standard deviation. 1.726 + * 1.727 + * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for 1.728 + * approximating a Gaussian using box blurs. This yields quite a good 1.729 + * approximation for a Gaussian. Then we multiply this by 1.5 since our 1.730 + * code wants the radius of the entire triple-box-blur kernel instead of 1.731 + * the diameter of an individual box blur. For more details, see: 1.732 + * http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement 1.733 + * https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19 1.734 + */ 1.735 +static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5); 1.736 + 1.737 +IntSize 1.738 +AlphaBoxBlur::CalculateBlurRadius(const Point& aStd) 1.739 +{ 1.740 + IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5)), 1.741 + static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5))); 1.742 + 1.743 + return size; 1.744 +} 1.745 + 1.746 +} 1.747 +}