michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "Blur.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "mozilla/CheckedInt.h" michael@0: #include "mozilla/Constants.h" michael@0: michael@0: #include "2D.h" michael@0: #include "DataSurfaceHelpers.h" michael@0: #include "Tools.h" michael@0: michael@0: using namespace std; michael@0: michael@0: namespace mozilla { michael@0: namespace gfx { michael@0: michael@0: /** michael@0: * Box blur involves looking at one pixel, and setting its value to the average michael@0: * of its neighbouring pixels. michael@0: * @param aInput The input buffer. michael@0: * @param aOutput The output buffer. michael@0: * @param aLeftLobe The number of pixels to blend on the left. michael@0: * @param aRightLobe The number of pixels to blend on the right. michael@0: * @param aWidth The number of columns in the buffers. michael@0: * @param aRows The number of rows in the buffers. michael@0: * @param aSkipRect An area to skip blurring in. michael@0: * XXX shouldn't we pass stride in separately here? michael@0: */ michael@0: static void michael@0: BoxBlurHorizontal(unsigned char* aInput, michael@0: unsigned char* aOutput, michael@0: int32_t aLeftLobe, michael@0: int32_t aRightLobe, michael@0: int32_t aWidth, michael@0: int32_t aRows, michael@0: const IntRect& aSkipRect) michael@0: { michael@0: MOZ_ASSERT(aWidth > 0); michael@0: michael@0: int32_t boxSize = aLeftLobe + aRightLobe + 1; michael@0: bool skipRectCoversWholeRow = 0 >= aSkipRect.x && michael@0: aWidth <= aSkipRect.XMost(); michael@0: if (boxSize == 1) { michael@0: memcpy(aOutput, aInput, aWidth*aRows); michael@0: return; michael@0: } michael@0: uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); michael@0: michael@0: for (int32_t y = 0; y < aRows; y++) { michael@0: // Check whether the skip rect intersects this row. If the skip michael@0: // rect covers the whole surface in this row, we can avoid michael@0: // this row entirely (and any others along the skip rect). michael@0: bool inSkipRectY = y >= aSkipRect.y && michael@0: y < aSkipRect.YMost(); michael@0: if (inSkipRectY && skipRectCoversWholeRow) { michael@0: y = aSkipRect.YMost() - 1; michael@0: continue; michael@0: } michael@0: michael@0: uint32_t alphaSum = 0; michael@0: for (int32_t i = 0; i < boxSize; i++) { michael@0: int32_t pos = i - aLeftLobe; michael@0: // See assertion above; if aWidth is zero, then we would have no michael@0: // valid position to clamp to. michael@0: pos = max(pos, 0); michael@0: pos = min(pos, aWidth - 1); michael@0: alphaSum += aInput[aWidth * y + pos]; michael@0: } michael@0: for (int32_t x = 0; x < aWidth; x++) { michael@0: // Check whether we are within the skip rect. If so, go michael@0: // to the next point outside the skip rect. michael@0: if (inSkipRectY && x >= aSkipRect.x && michael@0: x < aSkipRect.XMost()) { michael@0: x = aSkipRect.XMost(); michael@0: if (x >= aWidth) michael@0: break; michael@0: michael@0: // Recalculate the neighbouring alpha values for michael@0: // our new point on the surface. michael@0: alphaSum = 0; michael@0: for (int32_t i = 0; i < boxSize; i++) { michael@0: int32_t pos = x + i - aLeftLobe; michael@0: // See assertion above; if aWidth is zero, then we would have no michael@0: // valid position to clamp to. michael@0: pos = max(pos, 0); michael@0: pos = min(pos, aWidth - 1); michael@0: alphaSum += aInput[aWidth * y + pos]; michael@0: } michael@0: } michael@0: int32_t tmp = x - aLeftLobe; michael@0: int32_t last = max(tmp, 0); michael@0: int32_t next = min(tmp + boxSize, aWidth - 1); michael@0: michael@0: aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32; michael@0: michael@0: alphaSum += aInput[aWidth * y + next] - michael@0: aInput[aWidth * y + last]; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of michael@0: * left and right. michael@0: * XXX shouldn't we pass stride in separately here? michael@0: */ michael@0: static void michael@0: BoxBlurVertical(unsigned char* aInput, michael@0: unsigned char* aOutput, michael@0: int32_t aTopLobe, michael@0: int32_t aBottomLobe, michael@0: int32_t aWidth, michael@0: int32_t aRows, michael@0: const IntRect& aSkipRect) michael@0: { michael@0: MOZ_ASSERT(aRows > 0); michael@0: michael@0: int32_t boxSize = aTopLobe + aBottomLobe + 1; michael@0: bool skipRectCoversWholeColumn = 0 >= aSkipRect.y && michael@0: aRows <= aSkipRect.YMost(); michael@0: if (boxSize == 1) { michael@0: memcpy(aOutput, aInput, aWidth*aRows); michael@0: return; michael@0: } michael@0: uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); michael@0: michael@0: for (int32_t x = 0; x < aWidth; x++) { michael@0: bool inSkipRectX = x >= aSkipRect.x && michael@0: x < aSkipRect.XMost(); michael@0: if (inSkipRectX && skipRectCoversWholeColumn) { michael@0: x = aSkipRect.XMost() - 1; michael@0: continue; michael@0: } michael@0: michael@0: uint32_t alphaSum = 0; michael@0: for (int32_t i = 0; i < boxSize; i++) { michael@0: int32_t pos = i - aTopLobe; michael@0: // See assertion above; if aRows is zero, then we would have no michael@0: // valid position to clamp to. michael@0: pos = max(pos, 0); michael@0: pos = min(pos, aRows - 1); michael@0: alphaSum += aInput[aWidth * pos + x]; michael@0: } michael@0: for (int32_t y = 0; y < aRows; y++) { michael@0: if (inSkipRectX && y >= aSkipRect.y && michael@0: y < aSkipRect.YMost()) { michael@0: y = aSkipRect.YMost(); michael@0: if (y >= aRows) michael@0: break; michael@0: michael@0: alphaSum = 0; michael@0: for (int32_t i = 0; i < boxSize; i++) { michael@0: int32_t pos = y + i - aTopLobe; michael@0: // See assertion above; if aRows is zero, then we would have no michael@0: // valid position to clamp to. michael@0: pos = max(pos, 0); michael@0: pos = min(pos, aRows - 1); michael@0: alphaSum += aInput[aWidth * pos + x]; michael@0: } michael@0: } michael@0: int32_t tmp = y - aTopLobe; michael@0: int32_t last = max(tmp, 0); michael@0: int32_t next = min(tmp + boxSize, aRows - 1); michael@0: michael@0: aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32; michael@0: michael@0: alphaSum += aInput[aWidth * next + x] - michael@0: aInput[aWidth * last + x]; michael@0: } michael@0: } michael@0: } michael@0: michael@0: static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2]) michael@0: { michael@0: int32_t major, minor, final; michael@0: michael@0: /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for michael@0: * some notes about approximating the Gaussian blur with box-blurs. michael@0: * The comments below are in the terminology of that page. michael@0: */ michael@0: int32_t z = aRadius / 3; michael@0: switch (aRadius % 3) { michael@0: case 0: michael@0: // aRadius = z*3; choose d = 2*z + 1 michael@0: major = minor = final = z; michael@0: break; michael@0: case 1: michael@0: // aRadius = z*3 + 1 michael@0: // This is a tricky case since there is no value of d which will michael@0: // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1 michael@0: // for some integer k, then the radius will be 3*k. If d is even, michael@0: // i.e. d=2*k, then the radius will be 3*k - 1. michael@0: // So we have to choose values that don't match the standard michael@0: // algorithm. michael@0: major = z + 1; michael@0: minor = final = z; michael@0: break; michael@0: case 2: michael@0: // aRadius = z*3 + 2; choose d = 2*z + 2 michael@0: major = final = z + 1; michael@0: minor = z; michael@0: break; michael@0: default: michael@0: // Mathematical impossibility! michael@0: MOZ_ASSERT(false); michael@0: major = minor = final = 0; michael@0: } michael@0: MOZ_ASSERT(major + minor + final == aRadius); michael@0: michael@0: aLobes[0][0] = major; michael@0: aLobes[0][1] = minor; michael@0: aLobes[1][0] = minor; michael@0: aLobes[1][1] = major; michael@0: aLobes[2][0] = final; michael@0: aLobes[2][1] = final; michael@0: } michael@0: michael@0: static void michael@0: SpreadHorizontal(unsigned char* aInput, michael@0: unsigned char* aOutput, michael@0: int32_t aRadius, michael@0: int32_t aWidth, michael@0: int32_t aRows, michael@0: int32_t aStride, michael@0: const IntRect& aSkipRect) michael@0: { michael@0: if (aRadius == 0) { michael@0: memcpy(aOutput, aInput, aStride * aRows); michael@0: return; michael@0: } michael@0: michael@0: bool skipRectCoversWholeRow = 0 >= aSkipRect.x && michael@0: aWidth <= aSkipRect.XMost(); michael@0: for (int32_t y = 0; y < aRows; y++) { michael@0: // Check whether the skip rect intersects this row. If the skip michael@0: // rect covers the whole surface in this row, we can avoid michael@0: // this row entirely (and any others along the skip rect). michael@0: bool inSkipRectY = y >= aSkipRect.y && michael@0: y < aSkipRect.YMost(); michael@0: if (inSkipRectY && skipRectCoversWholeRow) { michael@0: y = aSkipRect.YMost() - 1; michael@0: continue; michael@0: } michael@0: michael@0: for (int32_t x = 0; x < aWidth; x++) { michael@0: // Check whether we are within the skip rect. If so, go michael@0: // to the next point outside the skip rect. michael@0: if (inSkipRectY && x >= aSkipRect.x && michael@0: x < aSkipRect.XMost()) { michael@0: x = aSkipRect.XMost(); michael@0: if (x >= aWidth) michael@0: break; michael@0: } michael@0: michael@0: int32_t sMin = max(x - aRadius, 0); michael@0: int32_t sMax = min(x + aRadius, aWidth - 1); michael@0: int32_t v = 0; michael@0: for (int32_t s = sMin; s <= sMax; ++s) { michael@0: v = max(v, aInput[aStride * y + s]); michael@0: } michael@0: aOutput[aStride * y + x] = v; michael@0: } michael@0: } michael@0: } michael@0: michael@0: static void michael@0: SpreadVertical(unsigned char* aInput, michael@0: unsigned char* aOutput, michael@0: int32_t aRadius, michael@0: int32_t aWidth, michael@0: int32_t aRows, michael@0: int32_t aStride, michael@0: const IntRect& aSkipRect) michael@0: { michael@0: if (aRadius == 0) { michael@0: memcpy(aOutput, aInput, aStride * aRows); michael@0: return; michael@0: } michael@0: michael@0: bool skipRectCoversWholeColumn = 0 >= aSkipRect.y && michael@0: aRows <= aSkipRect.YMost(); michael@0: for (int32_t x = 0; x < aWidth; x++) { michael@0: bool inSkipRectX = x >= aSkipRect.x && michael@0: x < aSkipRect.XMost(); michael@0: if (inSkipRectX && skipRectCoversWholeColumn) { michael@0: x = aSkipRect.XMost() - 1; michael@0: continue; michael@0: } michael@0: michael@0: for (int32_t y = 0; y < aRows; y++) { michael@0: // Check whether we are within the skip rect. If so, go michael@0: // to the next point outside the skip rect. michael@0: if (inSkipRectX && y >= aSkipRect.y && michael@0: y < aSkipRect.YMost()) { michael@0: y = aSkipRect.YMost(); michael@0: if (y >= aRows) michael@0: break; michael@0: } michael@0: michael@0: int32_t sMin = max(y - aRadius, 0); michael@0: int32_t sMax = min(y + aRadius, aRows - 1); michael@0: int32_t v = 0; michael@0: for (int32_t s = sMin; s <= sMax; ++s) { michael@0: v = max(v, aInput[aStride * s + x]); michael@0: } michael@0: aOutput[aStride * y + x] = v; michael@0: } michael@0: } michael@0: } michael@0: michael@0: CheckedInt michael@0: AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal) michael@0: { michael@0: CheckedInt val(aVal); michael@0: michael@0: val += 3; michael@0: val /= 4; michael@0: val *= 4; michael@0: michael@0: return val; michael@0: } michael@0: michael@0: AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect, michael@0: const IntSize& aSpreadRadius, michael@0: const IntSize& aBlurRadius, michael@0: const Rect* aDirtyRect, michael@0: const Rect* aSkipRect) michael@0: : mSpreadRadius(aSpreadRadius), michael@0: mBlurRadius(aBlurRadius), michael@0: mSurfaceAllocationSize(0) michael@0: { michael@0: Rect rect(aRect); michael@0: rect.Inflate(Size(aBlurRadius + aSpreadRadius)); michael@0: rect.RoundOut(); michael@0: michael@0: if (aDirtyRect) { michael@0: // If we get passed a dirty rect from layout, we can minimize the michael@0: // shadow size and make painting faster. michael@0: mHasDirtyRect = true; michael@0: mDirtyRect = *aDirtyRect; michael@0: Rect requiredBlurArea = mDirtyRect.Intersect(rect); michael@0: requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius)); michael@0: rect = requiredBlurArea.Intersect(rect); michael@0: } else { michael@0: mHasDirtyRect = false; michael@0: } michael@0: michael@0: mRect = IntRect(int32_t(rect.x), int32_t(rect.y), michael@0: int32_t(rect.width), int32_t(rect.height)); michael@0: if (mRect.IsEmpty()) { michael@0: return; michael@0: } michael@0: michael@0: if (aSkipRect) { michael@0: // If we get passed a skip rect, we can lower the amount of michael@0: // blurring/spreading we need to do. We convert it to IntRect to avoid michael@0: // expensive int<->float conversions if we were to use Rect instead. michael@0: Rect skipRect = *aSkipRect; michael@0: skipRect.RoundIn(); michael@0: skipRect.Deflate(Size(aBlurRadius + aSpreadRadius)); michael@0: mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y), michael@0: int32_t(skipRect.width), int32_t(skipRect.height)); michael@0: michael@0: mSkipRect = mSkipRect.Intersect(mRect); michael@0: if (mSkipRect.IsEqualInterior(mRect)) michael@0: return; michael@0: michael@0: mSkipRect -= mRect.TopLeft(); michael@0: } else { michael@0: mSkipRect = IntRect(0, 0, 0, 0); michael@0: } michael@0: michael@0: CheckedInt stride = RoundUpToMultipleOf4(mRect.width); michael@0: if (stride.isValid()) { michael@0: mStride = stride.value(); michael@0: michael@0: // We need to leave room for an additional 3 bytes for a potential overrun michael@0: // in our blurring code. michael@0: size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3); michael@0: if (size != 0) { michael@0: mSurfaceAllocationSize = size; michael@0: } michael@0: } michael@0: } michael@0: michael@0: AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect, michael@0: int32_t aStride, michael@0: float aSigmaX, michael@0: float aSigmaY) michael@0: : mRect(int32_t(aRect.x), int32_t(aRect.y), michael@0: int32_t(aRect.width), int32_t(aRect.height)), michael@0: mSpreadRadius(), michael@0: mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))), michael@0: mStride(aStride), michael@0: mSurfaceAllocationSize(0) michael@0: { michael@0: IntRect intRect; michael@0: if (aRect.ToIntRect(&intRect)) { michael@0: size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height); michael@0: if (minDataSize != 0) { michael@0: mSurfaceAllocationSize = minDataSize; michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: AlphaBoxBlur::~AlphaBoxBlur() michael@0: { michael@0: } michael@0: michael@0: IntSize michael@0: AlphaBoxBlur::GetSize() michael@0: { michael@0: IntSize size(mRect.width, mRect.height); michael@0: return size; michael@0: } michael@0: michael@0: int32_t michael@0: AlphaBoxBlur::GetStride() michael@0: { michael@0: return mStride; michael@0: } michael@0: michael@0: IntRect michael@0: AlphaBoxBlur::GetRect() michael@0: { michael@0: return mRect; michael@0: } michael@0: michael@0: Rect* michael@0: AlphaBoxBlur::GetDirtyRect() michael@0: { michael@0: if (mHasDirtyRect) { michael@0: return &mDirtyRect; michael@0: } michael@0: michael@0: return nullptr; michael@0: } michael@0: michael@0: size_t michael@0: AlphaBoxBlur::GetSurfaceAllocationSize() const michael@0: { michael@0: return mSurfaceAllocationSize; michael@0: } michael@0: michael@0: void michael@0: AlphaBoxBlur::Blur(uint8_t* aData) michael@0: { michael@0: if (!aData) { michael@0: return; michael@0: } michael@0: michael@0: // no need to do all this if not blurring or spreading michael@0: if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) { michael@0: int32_t stride = GetStride(); michael@0: michael@0: IntSize size = GetSize(); michael@0: michael@0: if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) { michael@0: // No need to use CheckedInt here - we have validated it in the constructor. michael@0: size_t szB = stride * size.height; michael@0: unsigned char* tmpData = new (std::nothrow) uint8_t[szB]; michael@0: michael@0: if (!tmpData) { michael@0: return; michael@0: } michael@0: michael@0: memset(tmpData, 0, szB); michael@0: michael@0: SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect); michael@0: SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect); michael@0: michael@0: delete [] tmpData; michael@0: } michael@0: michael@0: int32_t horizontalLobes[3][2]; michael@0: ComputeLobes(mBlurRadius.width, horizontalLobes); michael@0: int32_t verticalLobes[3][2]; michael@0: ComputeLobes(mBlurRadius.height, verticalLobes); michael@0: michael@0: // We want to allow for some extra space on the left for alignment reasons. michael@0: int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value(); michael@0: michael@0: IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1], michael@0: size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1); michael@0: michael@0: if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) { michael@0: // Fallback to old blurring code when the surface is so large it may michael@0: // overflow our integral image! michael@0: michael@0: // No need to use CheckedInt here - we have validated it in the constructor. michael@0: size_t szB = stride * size.height; michael@0: uint8_t* tmpData = new (std::nothrow) uint8_t[szB]; michael@0: if (!tmpData) { michael@0: return; michael@0: } michael@0: michael@0: memset(tmpData, 0, szB); michael@0: michael@0: uint8_t* a = aData; michael@0: uint8_t* b = tmpData; michael@0: if (mBlurRadius.width > 0) { michael@0: BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect); michael@0: BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect); michael@0: BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect); michael@0: } else { michael@0: a = tmpData; michael@0: b = aData; michael@0: } michael@0: // The result is in 'b' here. michael@0: if (mBlurRadius.height > 0) { michael@0: BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect); michael@0: BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect); michael@0: BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect); michael@0: } else { michael@0: a = b; michael@0: } michael@0: // The result is in 'a' here. michael@0: if (a == tmpData) { michael@0: memcpy(aData, tmpData, szB); michael@0: } michael@0: delete [] tmpData; michael@0: } else { michael@0: size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width * 4); michael@0: michael@0: // We need to leave room for an additional 12 bytes for a maximum overrun michael@0: // of 3 pixels in the blurring code. michael@0: size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12); michael@0: if (bufLen == 0) { michael@0: return; michael@0: } michael@0: // bufLen is a byte count, but here we want a multiple of 32-bit ints, so michael@0: // we divide by 4. michael@0: AlignedArray integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0)); michael@0: michael@0: if (!integralImage) { michael@0: return; michael@0: } michael@0: #ifdef USE_SSE2 michael@0: if (Factory::HasSSE2()) { michael@0: BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0], michael@0: verticalLobes[0][1], integralImage, integralImageStride); michael@0: BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0], michael@0: verticalLobes[1][1], integralImage, integralImageStride); michael@0: BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0], michael@0: verticalLobes[2][1], integralImage, integralImageStride); michael@0: } else michael@0: #endif michael@0: { michael@0: BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0], michael@0: verticalLobes[0][1], integralImage, integralImageStride); michael@0: BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0], michael@0: verticalLobes[1][1], integralImage, integralImageStride); michael@0: BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0], michael@0: verticalLobes[2][1], integralImage, integralImageStride); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE void michael@0: GenerateIntegralRow(uint32_t *aDest, const uint8_t *aSource, uint32_t *aPreviousRow, michael@0: const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation) michael@0: { michael@0: uint32_t currentRowSum = 0; michael@0: uint32_t pixel = aSource[0]; michael@0: for (uint32_t x = 0; x < aLeftInflation; x++) { michael@0: currentRowSum += pixel; michael@0: *aDest++ = currentRowSum + *aPreviousRow++; michael@0: } michael@0: for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) { michael@0: uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation)); michael@0: #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__ michael@0: currentRowSum += (alphaValues >> 24) & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: currentRowSum += (alphaValues >> 16) & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: currentRowSum += (alphaValues >> 8) & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: currentRowSum += alphaValues & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: #else michael@0: currentRowSum += alphaValues & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: alphaValues >>= 8; michael@0: currentRowSum += alphaValues & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: alphaValues >>= 8; michael@0: currentRowSum += alphaValues & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: alphaValues >>= 8; michael@0: currentRowSum += alphaValues & 0xff; michael@0: *aDest++ = *aPreviousRow++ + currentRowSum; michael@0: #endif michael@0: } michael@0: pixel = aSource[aSourceWidth - 1]; michael@0: for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) { michael@0: currentRowSum += pixel; michael@0: *aDest++ = currentRowSum + *aPreviousRow++; michael@0: } michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE void michael@0: GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation, michael@0: int32_t aTopInflation, int32_t aBottomInflation, michael@0: uint32_t *aIntegralImage, size_t aIntegralImageStride, michael@0: uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize) michael@0: { michael@0: uint32_t stride32bit = aIntegralImageStride / 4; michael@0: michael@0: IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation, michael@0: aSize.height + aTopInflation + aBottomInflation); michael@0: michael@0: memset(aIntegralImage, 0, aIntegralImageStride); michael@0: michael@0: GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage, michael@0: aSize.width, aLeftInflation, aRightInflation); michael@0: for (int y = 1; y < aTopInflation + 1; y++) { michael@0: GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit, michael@0: aSize.width, aLeftInflation, aRightInflation); michael@0: } michael@0: michael@0: for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) { michael@0: GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation), michael@0: aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation); michael@0: } michael@0: michael@0: if (aBottomInflation) { michael@0: for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) { michael@0: GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride), michael@0: aIntegralImage + (y - 1) * stride32bit, michael@0: aSize.width, aLeftInflation, aRightInflation); michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Attempt to do an in-place box blur using an integral image. michael@0: */ michael@0: void michael@0: AlphaBoxBlur::BoxBlur_C(uint8_t* aData, michael@0: int32_t aLeftLobe, michael@0: int32_t aRightLobe, michael@0: int32_t aTopLobe, michael@0: int32_t aBottomLobe, michael@0: uint32_t *aIntegralImage, michael@0: size_t aIntegralImageStride) michael@0: { michael@0: IntSize size = GetSize(); michael@0: michael@0: MOZ_ASSERT(size.width > 0); michael@0: michael@0: // Our 'left' or 'top' lobe will include the current pixel. i.e. when michael@0: // looking at an integral image the value of a pixel at 'x,y' is calculated michael@0: // using the value of the integral image values above/below that. michael@0: aLeftLobe++; michael@0: aTopLobe++; michael@0: int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe); michael@0: michael@0: MOZ_ASSERT(boxSize > 0); michael@0: michael@0: if (boxSize == 1) { michael@0: return; michael@0: } michael@0: michael@0: int32_t stride32bit = aIntegralImageStride / 4; michael@0: michael@0: int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value(); michael@0: michael@0: GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe, michael@0: aIntegralImage, aIntegralImageStride, aData, michael@0: mStride, size); michael@0: michael@0: uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); michael@0: michael@0: uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation; michael@0: michael@0: // Storing these locally makes this about 30% faster! Presumably the compiler michael@0: // can't be sure we're not altering the member variables in this loop. michael@0: IntRect skipRect = mSkipRect; michael@0: uint8_t *data = aData; michael@0: int32_t stride = mStride; michael@0: for (int32_t y = 0; y < size.height; y++) { michael@0: bool inSkipRectY = y > skipRect.y && y < skipRect.YMost(); michael@0: michael@0: uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe); michael@0: uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe); michael@0: uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe); michael@0: uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe); michael@0: michael@0: for (int32_t x = 0; x < size.width; x++) { michael@0: if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) { michael@0: x = skipRect.XMost() - 1; michael@0: // Trigger early jump on coming loop iterations, this will be reset michael@0: // next line anyway. michael@0: inSkipRectY = false; michael@0: continue; michael@0: } michael@0: int32_t topLeft = topLeftBase[x]; michael@0: int32_t topRight = topRightBase[x]; michael@0: int32_t bottomRight = bottomRightBase[x]; michael@0: int32_t bottomLeft = bottomLeftBase[x]; michael@0: michael@0: uint32_t value = bottomRight - topRight - bottomLeft; michael@0: value += topLeft; michael@0: michael@0: data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Compute the box blur size (which we're calling the blur radius) from michael@0: * the standard deviation. michael@0: * michael@0: * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for michael@0: * approximating a Gaussian using box blurs. This yields quite a good michael@0: * approximation for a Gaussian. Then we multiply this by 1.5 since our michael@0: * code wants the radius of the entire triple-box-blur kernel instead of michael@0: * the diameter of an individual box blur. For more details, see: michael@0: * http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement michael@0: * https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19 michael@0: */ michael@0: static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5); michael@0: michael@0: IntSize michael@0: AlphaBoxBlur::CalculateBlurRadius(const Point& aStd) michael@0: { michael@0: IntSize size(static_cast(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5)), michael@0: static_cast(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5))); michael@0: michael@0: return size; michael@0: } michael@0: michael@0: } michael@0: }