|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include "Blur.h" |
|
8 |
|
9 #include <algorithm> |
|
10 #include <math.h> |
|
11 #include <string.h> |
|
12 |
|
13 #include "mozilla/CheckedInt.h" |
|
14 #include "mozilla/Constants.h" |
|
15 |
|
16 #include "2D.h" |
|
17 #include "DataSurfaceHelpers.h" |
|
18 #include "Tools.h" |
|
19 |
|
20 using namespace std; |
|
21 |
|
22 namespace mozilla { |
|
23 namespace gfx { |
|
24 |
|
25 /** |
|
26 * Box blur involves looking at one pixel, and setting its value to the average |
|
27 * of its neighbouring pixels. |
|
28 * @param aInput The input buffer. |
|
29 * @param aOutput The output buffer. |
|
30 * @param aLeftLobe The number of pixels to blend on the left. |
|
31 * @param aRightLobe The number of pixels to blend on the right. |
|
32 * @param aWidth The number of columns in the buffers. |
|
33 * @param aRows The number of rows in the buffers. |
|
34 * @param aSkipRect An area to skip blurring in. |
|
35 * XXX shouldn't we pass stride in separately here? |
|
36 */ |
|
37 static void |
|
38 BoxBlurHorizontal(unsigned char* aInput, |
|
39 unsigned char* aOutput, |
|
40 int32_t aLeftLobe, |
|
41 int32_t aRightLobe, |
|
42 int32_t aWidth, |
|
43 int32_t aRows, |
|
44 const IntRect& aSkipRect) |
|
45 { |
|
46 MOZ_ASSERT(aWidth > 0); |
|
47 |
|
48 int32_t boxSize = aLeftLobe + aRightLobe + 1; |
|
49 bool skipRectCoversWholeRow = 0 >= aSkipRect.x && |
|
50 aWidth <= aSkipRect.XMost(); |
|
51 if (boxSize == 1) { |
|
52 memcpy(aOutput, aInput, aWidth*aRows); |
|
53 return; |
|
54 } |
|
55 uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); |
|
56 |
|
57 for (int32_t y = 0; y < aRows; y++) { |
|
58 // Check whether the skip rect intersects this row. If the skip |
|
59 // rect covers the whole surface in this row, we can avoid |
|
60 // this row entirely (and any others along the skip rect). |
|
61 bool inSkipRectY = y >= aSkipRect.y && |
|
62 y < aSkipRect.YMost(); |
|
63 if (inSkipRectY && skipRectCoversWholeRow) { |
|
64 y = aSkipRect.YMost() - 1; |
|
65 continue; |
|
66 } |
|
67 |
|
68 uint32_t alphaSum = 0; |
|
69 for (int32_t i = 0; i < boxSize; i++) { |
|
70 int32_t pos = i - aLeftLobe; |
|
71 // See assertion above; if aWidth is zero, then we would have no |
|
72 // valid position to clamp to. |
|
73 pos = max(pos, 0); |
|
74 pos = min(pos, aWidth - 1); |
|
75 alphaSum += aInput[aWidth * y + pos]; |
|
76 } |
|
77 for (int32_t x = 0; x < aWidth; x++) { |
|
78 // Check whether we are within the skip rect. If so, go |
|
79 // to the next point outside the skip rect. |
|
80 if (inSkipRectY && x >= aSkipRect.x && |
|
81 x < aSkipRect.XMost()) { |
|
82 x = aSkipRect.XMost(); |
|
83 if (x >= aWidth) |
|
84 break; |
|
85 |
|
86 // Recalculate the neighbouring alpha values for |
|
87 // our new point on the surface. |
|
88 alphaSum = 0; |
|
89 for (int32_t i = 0; i < boxSize; i++) { |
|
90 int32_t pos = x + i - aLeftLobe; |
|
91 // See assertion above; if aWidth is zero, then we would have no |
|
92 // valid position to clamp to. |
|
93 pos = max(pos, 0); |
|
94 pos = min(pos, aWidth - 1); |
|
95 alphaSum += aInput[aWidth * y + pos]; |
|
96 } |
|
97 } |
|
98 int32_t tmp = x - aLeftLobe; |
|
99 int32_t last = max(tmp, 0); |
|
100 int32_t next = min(tmp + boxSize, aWidth - 1); |
|
101 |
|
102 aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32; |
|
103 |
|
104 alphaSum += aInput[aWidth * y + next] - |
|
105 aInput[aWidth * y + last]; |
|
106 } |
|
107 } |
|
108 } |
|
109 |
|
110 /** |
|
111 * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of |
|
112 * left and right. |
|
113 * XXX shouldn't we pass stride in separately here? |
|
114 */ |
|
115 static void |
|
116 BoxBlurVertical(unsigned char* aInput, |
|
117 unsigned char* aOutput, |
|
118 int32_t aTopLobe, |
|
119 int32_t aBottomLobe, |
|
120 int32_t aWidth, |
|
121 int32_t aRows, |
|
122 const IntRect& aSkipRect) |
|
123 { |
|
124 MOZ_ASSERT(aRows > 0); |
|
125 |
|
126 int32_t boxSize = aTopLobe + aBottomLobe + 1; |
|
127 bool skipRectCoversWholeColumn = 0 >= aSkipRect.y && |
|
128 aRows <= aSkipRect.YMost(); |
|
129 if (boxSize == 1) { |
|
130 memcpy(aOutput, aInput, aWidth*aRows); |
|
131 return; |
|
132 } |
|
133 uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); |
|
134 |
|
135 for (int32_t x = 0; x < aWidth; x++) { |
|
136 bool inSkipRectX = x >= aSkipRect.x && |
|
137 x < aSkipRect.XMost(); |
|
138 if (inSkipRectX && skipRectCoversWholeColumn) { |
|
139 x = aSkipRect.XMost() - 1; |
|
140 continue; |
|
141 } |
|
142 |
|
143 uint32_t alphaSum = 0; |
|
144 for (int32_t i = 0; i < boxSize; i++) { |
|
145 int32_t pos = i - aTopLobe; |
|
146 // See assertion above; if aRows is zero, then we would have no |
|
147 // valid position to clamp to. |
|
148 pos = max(pos, 0); |
|
149 pos = min(pos, aRows - 1); |
|
150 alphaSum += aInput[aWidth * pos + x]; |
|
151 } |
|
152 for (int32_t y = 0; y < aRows; y++) { |
|
153 if (inSkipRectX && y >= aSkipRect.y && |
|
154 y < aSkipRect.YMost()) { |
|
155 y = aSkipRect.YMost(); |
|
156 if (y >= aRows) |
|
157 break; |
|
158 |
|
159 alphaSum = 0; |
|
160 for (int32_t i = 0; i < boxSize; i++) { |
|
161 int32_t pos = y + i - aTopLobe; |
|
162 // See assertion above; if aRows is zero, then we would have no |
|
163 // valid position to clamp to. |
|
164 pos = max(pos, 0); |
|
165 pos = min(pos, aRows - 1); |
|
166 alphaSum += aInput[aWidth * pos + x]; |
|
167 } |
|
168 } |
|
169 int32_t tmp = y - aTopLobe; |
|
170 int32_t last = max(tmp, 0); |
|
171 int32_t next = min(tmp + boxSize, aRows - 1); |
|
172 |
|
173 aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32; |
|
174 |
|
175 alphaSum += aInput[aWidth * next + x] - |
|
176 aInput[aWidth * last + x]; |
|
177 } |
|
178 } |
|
179 } |
|
180 |
|
181 static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2]) |
|
182 { |
|
183 int32_t major, minor, final; |
|
184 |
|
185 /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for |
|
186 * some notes about approximating the Gaussian blur with box-blurs. |
|
187 * The comments below are in the terminology of that page. |
|
188 */ |
|
189 int32_t z = aRadius / 3; |
|
190 switch (aRadius % 3) { |
|
191 case 0: |
|
192 // aRadius = z*3; choose d = 2*z + 1 |
|
193 major = minor = final = z; |
|
194 break; |
|
195 case 1: |
|
196 // aRadius = z*3 + 1 |
|
197 // This is a tricky case since there is no value of d which will |
|
198 // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1 |
|
199 // for some integer k, then the radius will be 3*k. If d is even, |
|
200 // i.e. d=2*k, then the radius will be 3*k - 1. |
|
201 // So we have to choose values that don't match the standard |
|
202 // algorithm. |
|
203 major = z + 1; |
|
204 minor = final = z; |
|
205 break; |
|
206 case 2: |
|
207 // aRadius = z*3 + 2; choose d = 2*z + 2 |
|
208 major = final = z + 1; |
|
209 minor = z; |
|
210 break; |
|
211 default: |
|
212 // Mathematical impossibility! |
|
213 MOZ_ASSERT(false); |
|
214 major = minor = final = 0; |
|
215 } |
|
216 MOZ_ASSERT(major + minor + final == aRadius); |
|
217 |
|
218 aLobes[0][0] = major; |
|
219 aLobes[0][1] = minor; |
|
220 aLobes[1][0] = minor; |
|
221 aLobes[1][1] = major; |
|
222 aLobes[2][0] = final; |
|
223 aLobes[2][1] = final; |
|
224 } |
|
225 |
|
226 static void |
|
227 SpreadHorizontal(unsigned char* aInput, |
|
228 unsigned char* aOutput, |
|
229 int32_t aRadius, |
|
230 int32_t aWidth, |
|
231 int32_t aRows, |
|
232 int32_t aStride, |
|
233 const IntRect& aSkipRect) |
|
234 { |
|
235 if (aRadius == 0) { |
|
236 memcpy(aOutput, aInput, aStride * aRows); |
|
237 return; |
|
238 } |
|
239 |
|
240 bool skipRectCoversWholeRow = 0 >= aSkipRect.x && |
|
241 aWidth <= aSkipRect.XMost(); |
|
242 for (int32_t y = 0; y < aRows; y++) { |
|
243 // Check whether the skip rect intersects this row. If the skip |
|
244 // rect covers the whole surface in this row, we can avoid |
|
245 // this row entirely (and any others along the skip rect). |
|
246 bool inSkipRectY = y >= aSkipRect.y && |
|
247 y < aSkipRect.YMost(); |
|
248 if (inSkipRectY && skipRectCoversWholeRow) { |
|
249 y = aSkipRect.YMost() - 1; |
|
250 continue; |
|
251 } |
|
252 |
|
253 for (int32_t x = 0; x < aWidth; x++) { |
|
254 // Check whether we are within the skip rect. If so, go |
|
255 // to the next point outside the skip rect. |
|
256 if (inSkipRectY && x >= aSkipRect.x && |
|
257 x < aSkipRect.XMost()) { |
|
258 x = aSkipRect.XMost(); |
|
259 if (x >= aWidth) |
|
260 break; |
|
261 } |
|
262 |
|
263 int32_t sMin = max(x - aRadius, 0); |
|
264 int32_t sMax = min(x + aRadius, aWidth - 1); |
|
265 int32_t v = 0; |
|
266 for (int32_t s = sMin; s <= sMax; ++s) { |
|
267 v = max<int32_t>(v, aInput[aStride * y + s]); |
|
268 } |
|
269 aOutput[aStride * y + x] = v; |
|
270 } |
|
271 } |
|
272 } |
|
273 |
|
274 static void |
|
275 SpreadVertical(unsigned char* aInput, |
|
276 unsigned char* aOutput, |
|
277 int32_t aRadius, |
|
278 int32_t aWidth, |
|
279 int32_t aRows, |
|
280 int32_t aStride, |
|
281 const IntRect& aSkipRect) |
|
282 { |
|
283 if (aRadius == 0) { |
|
284 memcpy(aOutput, aInput, aStride * aRows); |
|
285 return; |
|
286 } |
|
287 |
|
288 bool skipRectCoversWholeColumn = 0 >= aSkipRect.y && |
|
289 aRows <= aSkipRect.YMost(); |
|
290 for (int32_t x = 0; x < aWidth; x++) { |
|
291 bool inSkipRectX = x >= aSkipRect.x && |
|
292 x < aSkipRect.XMost(); |
|
293 if (inSkipRectX && skipRectCoversWholeColumn) { |
|
294 x = aSkipRect.XMost() - 1; |
|
295 continue; |
|
296 } |
|
297 |
|
298 for (int32_t y = 0; y < aRows; y++) { |
|
299 // Check whether we are within the skip rect. If so, go |
|
300 // to the next point outside the skip rect. |
|
301 if (inSkipRectX && y >= aSkipRect.y && |
|
302 y < aSkipRect.YMost()) { |
|
303 y = aSkipRect.YMost(); |
|
304 if (y >= aRows) |
|
305 break; |
|
306 } |
|
307 |
|
308 int32_t sMin = max(y - aRadius, 0); |
|
309 int32_t sMax = min(y + aRadius, aRows - 1); |
|
310 int32_t v = 0; |
|
311 for (int32_t s = sMin; s <= sMax; ++s) { |
|
312 v = max<int32_t>(v, aInput[aStride * s + x]); |
|
313 } |
|
314 aOutput[aStride * y + x] = v; |
|
315 } |
|
316 } |
|
317 } |
|
318 |
|
319 CheckedInt<int32_t> |
|
320 AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal) |
|
321 { |
|
322 CheckedInt<int32_t> val(aVal); |
|
323 |
|
324 val += 3; |
|
325 val /= 4; |
|
326 val *= 4; |
|
327 |
|
328 return val; |
|
329 } |
|
330 |
|
331 AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect, |
|
332 const IntSize& aSpreadRadius, |
|
333 const IntSize& aBlurRadius, |
|
334 const Rect* aDirtyRect, |
|
335 const Rect* aSkipRect) |
|
336 : mSpreadRadius(aSpreadRadius), |
|
337 mBlurRadius(aBlurRadius), |
|
338 mSurfaceAllocationSize(0) |
|
339 { |
|
340 Rect rect(aRect); |
|
341 rect.Inflate(Size(aBlurRadius + aSpreadRadius)); |
|
342 rect.RoundOut(); |
|
343 |
|
344 if (aDirtyRect) { |
|
345 // If we get passed a dirty rect from layout, we can minimize the |
|
346 // shadow size and make painting faster. |
|
347 mHasDirtyRect = true; |
|
348 mDirtyRect = *aDirtyRect; |
|
349 Rect requiredBlurArea = mDirtyRect.Intersect(rect); |
|
350 requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius)); |
|
351 rect = requiredBlurArea.Intersect(rect); |
|
352 } else { |
|
353 mHasDirtyRect = false; |
|
354 } |
|
355 |
|
356 mRect = IntRect(int32_t(rect.x), int32_t(rect.y), |
|
357 int32_t(rect.width), int32_t(rect.height)); |
|
358 if (mRect.IsEmpty()) { |
|
359 return; |
|
360 } |
|
361 |
|
362 if (aSkipRect) { |
|
363 // If we get passed a skip rect, we can lower the amount of |
|
364 // blurring/spreading we need to do. We convert it to IntRect to avoid |
|
365 // expensive int<->float conversions if we were to use Rect instead. |
|
366 Rect skipRect = *aSkipRect; |
|
367 skipRect.RoundIn(); |
|
368 skipRect.Deflate(Size(aBlurRadius + aSpreadRadius)); |
|
369 mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y), |
|
370 int32_t(skipRect.width), int32_t(skipRect.height)); |
|
371 |
|
372 mSkipRect = mSkipRect.Intersect(mRect); |
|
373 if (mSkipRect.IsEqualInterior(mRect)) |
|
374 return; |
|
375 |
|
376 mSkipRect -= mRect.TopLeft(); |
|
377 } else { |
|
378 mSkipRect = IntRect(0, 0, 0, 0); |
|
379 } |
|
380 |
|
381 CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width); |
|
382 if (stride.isValid()) { |
|
383 mStride = stride.value(); |
|
384 |
|
385 // We need to leave room for an additional 3 bytes for a potential overrun |
|
386 // in our blurring code. |
|
387 size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3); |
|
388 if (size != 0) { |
|
389 mSurfaceAllocationSize = size; |
|
390 } |
|
391 } |
|
392 } |
|
393 |
|
394 AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect, |
|
395 int32_t aStride, |
|
396 float aSigmaX, |
|
397 float aSigmaY) |
|
398 : mRect(int32_t(aRect.x), int32_t(aRect.y), |
|
399 int32_t(aRect.width), int32_t(aRect.height)), |
|
400 mSpreadRadius(), |
|
401 mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))), |
|
402 mStride(aStride), |
|
403 mSurfaceAllocationSize(0) |
|
404 { |
|
405 IntRect intRect; |
|
406 if (aRect.ToIntRect(&intRect)) { |
|
407 size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height); |
|
408 if (minDataSize != 0) { |
|
409 mSurfaceAllocationSize = minDataSize; |
|
410 } |
|
411 } |
|
412 } |
|
413 |
|
414 |
|
415 AlphaBoxBlur::~AlphaBoxBlur() |
|
416 { |
|
417 } |
|
418 |
|
419 IntSize |
|
420 AlphaBoxBlur::GetSize() |
|
421 { |
|
422 IntSize size(mRect.width, mRect.height); |
|
423 return size; |
|
424 } |
|
425 |
|
426 int32_t |
|
427 AlphaBoxBlur::GetStride() |
|
428 { |
|
429 return mStride; |
|
430 } |
|
431 |
|
432 IntRect |
|
433 AlphaBoxBlur::GetRect() |
|
434 { |
|
435 return mRect; |
|
436 } |
|
437 |
|
438 Rect* |
|
439 AlphaBoxBlur::GetDirtyRect() |
|
440 { |
|
441 if (mHasDirtyRect) { |
|
442 return &mDirtyRect; |
|
443 } |
|
444 |
|
445 return nullptr; |
|
446 } |
|
447 |
|
448 size_t |
|
449 AlphaBoxBlur::GetSurfaceAllocationSize() const |
|
450 { |
|
451 return mSurfaceAllocationSize; |
|
452 } |
|
453 |
|
454 void |
|
455 AlphaBoxBlur::Blur(uint8_t* aData) |
|
456 { |
|
457 if (!aData) { |
|
458 return; |
|
459 } |
|
460 |
|
461 // no need to do all this if not blurring or spreading |
|
462 if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) { |
|
463 int32_t stride = GetStride(); |
|
464 |
|
465 IntSize size = GetSize(); |
|
466 |
|
467 if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) { |
|
468 // No need to use CheckedInt here - we have validated it in the constructor. |
|
469 size_t szB = stride * size.height; |
|
470 unsigned char* tmpData = new (std::nothrow) uint8_t[szB]; |
|
471 |
|
472 if (!tmpData) { |
|
473 return; |
|
474 } |
|
475 |
|
476 memset(tmpData, 0, szB); |
|
477 |
|
478 SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect); |
|
479 SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect); |
|
480 |
|
481 delete [] tmpData; |
|
482 } |
|
483 |
|
484 int32_t horizontalLobes[3][2]; |
|
485 ComputeLobes(mBlurRadius.width, horizontalLobes); |
|
486 int32_t verticalLobes[3][2]; |
|
487 ComputeLobes(mBlurRadius.height, verticalLobes); |
|
488 |
|
489 // We want to allow for some extra space on the left for alignment reasons. |
|
490 int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value(); |
|
491 |
|
492 IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1], |
|
493 size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1); |
|
494 |
|
495 if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) { |
|
496 // Fallback to old blurring code when the surface is so large it may |
|
497 // overflow our integral image! |
|
498 |
|
499 // No need to use CheckedInt here - we have validated it in the constructor. |
|
500 size_t szB = stride * size.height; |
|
501 uint8_t* tmpData = new (std::nothrow) uint8_t[szB]; |
|
502 if (!tmpData) { |
|
503 return; |
|
504 } |
|
505 |
|
506 memset(tmpData, 0, szB); |
|
507 |
|
508 uint8_t* a = aData; |
|
509 uint8_t* b = tmpData; |
|
510 if (mBlurRadius.width > 0) { |
|
511 BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect); |
|
512 BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect); |
|
513 BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect); |
|
514 } else { |
|
515 a = tmpData; |
|
516 b = aData; |
|
517 } |
|
518 // The result is in 'b' here. |
|
519 if (mBlurRadius.height > 0) { |
|
520 BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect); |
|
521 BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect); |
|
522 BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect); |
|
523 } else { |
|
524 a = b; |
|
525 } |
|
526 // The result is in 'a' here. |
|
527 if (a == tmpData) { |
|
528 memcpy(aData, tmpData, szB); |
|
529 } |
|
530 delete [] tmpData; |
|
531 } else { |
|
532 size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width * 4); |
|
533 |
|
534 // We need to leave room for an additional 12 bytes for a maximum overrun |
|
535 // of 3 pixels in the blurring code. |
|
536 size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12); |
|
537 if (bufLen == 0) { |
|
538 return; |
|
539 } |
|
540 // bufLen is a byte count, but here we want a multiple of 32-bit ints, so |
|
541 // we divide by 4. |
|
542 AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0)); |
|
543 |
|
544 if (!integralImage) { |
|
545 return; |
|
546 } |
|
547 #ifdef USE_SSE2 |
|
548 if (Factory::HasSSE2()) { |
|
549 BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0], |
|
550 verticalLobes[0][1], integralImage, integralImageStride); |
|
551 BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0], |
|
552 verticalLobes[1][1], integralImage, integralImageStride); |
|
553 BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0], |
|
554 verticalLobes[2][1], integralImage, integralImageStride); |
|
555 } else |
|
556 #endif |
|
557 { |
|
558 BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0], |
|
559 verticalLobes[0][1], integralImage, integralImageStride); |
|
560 BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0], |
|
561 verticalLobes[1][1], integralImage, integralImageStride); |
|
562 BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0], |
|
563 verticalLobes[2][1], integralImage, integralImageStride); |
|
564 } |
|
565 } |
|
566 } |
|
567 } |
|
568 |
|
569 MOZ_ALWAYS_INLINE void |
|
570 GenerateIntegralRow(uint32_t *aDest, const uint8_t *aSource, uint32_t *aPreviousRow, |
|
571 const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation) |
|
572 { |
|
573 uint32_t currentRowSum = 0; |
|
574 uint32_t pixel = aSource[0]; |
|
575 for (uint32_t x = 0; x < aLeftInflation; x++) { |
|
576 currentRowSum += pixel; |
|
577 *aDest++ = currentRowSum + *aPreviousRow++; |
|
578 } |
|
579 for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) { |
|
580 uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation)); |
|
581 #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__ |
|
582 currentRowSum += (alphaValues >> 24) & 0xff; |
|
583 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
584 currentRowSum += (alphaValues >> 16) & 0xff; |
|
585 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
586 currentRowSum += (alphaValues >> 8) & 0xff; |
|
587 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
588 currentRowSum += alphaValues & 0xff; |
|
589 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
590 #else |
|
591 currentRowSum += alphaValues & 0xff; |
|
592 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
593 alphaValues >>= 8; |
|
594 currentRowSum += alphaValues & 0xff; |
|
595 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
596 alphaValues >>= 8; |
|
597 currentRowSum += alphaValues & 0xff; |
|
598 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
599 alphaValues >>= 8; |
|
600 currentRowSum += alphaValues & 0xff; |
|
601 *aDest++ = *aPreviousRow++ + currentRowSum; |
|
602 #endif |
|
603 } |
|
604 pixel = aSource[aSourceWidth - 1]; |
|
605 for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) { |
|
606 currentRowSum += pixel; |
|
607 *aDest++ = currentRowSum + *aPreviousRow++; |
|
608 } |
|
609 } |
|
610 |
|
611 MOZ_ALWAYS_INLINE void |
|
612 GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation, |
|
613 int32_t aTopInflation, int32_t aBottomInflation, |
|
614 uint32_t *aIntegralImage, size_t aIntegralImageStride, |
|
615 uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize) |
|
616 { |
|
617 uint32_t stride32bit = aIntegralImageStride / 4; |
|
618 |
|
619 IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation, |
|
620 aSize.height + aTopInflation + aBottomInflation); |
|
621 |
|
622 memset(aIntegralImage, 0, aIntegralImageStride); |
|
623 |
|
624 GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage, |
|
625 aSize.width, aLeftInflation, aRightInflation); |
|
626 for (int y = 1; y < aTopInflation + 1; y++) { |
|
627 GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit, |
|
628 aSize.width, aLeftInflation, aRightInflation); |
|
629 } |
|
630 |
|
631 for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) { |
|
632 GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation), |
|
633 aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation); |
|
634 } |
|
635 |
|
636 if (aBottomInflation) { |
|
637 for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) { |
|
638 GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride), |
|
639 aIntegralImage + (y - 1) * stride32bit, |
|
640 aSize.width, aLeftInflation, aRightInflation); |
|
641 } |
|
642 } |
|
643 } |
|
644 |
|
645 /** |
|
646 * Attempt to do an in-place box blur using an integral image. |
|
647 */ |
|
648 void |
|
649 AlphaBoxBlur::BoxBlur_C(uint8_t* aData, |
|
650 int32_t aLeftLobe, |
|
651 int32_t aRightLobe, |
|
652 int32_t aTopLobe, |
|
653 int32_t aBottomLobe, |
|
654 uint32_t *aIntegralImage, |
|
655 size_t aIntegralImageStride) |
|
656 { |
|
657 IntSize size = GetSize(); |
|
658 |
|
659 MOZ_ASSERT(size.width > 0); |
|
660 |
|
661 // Our 'left' or 'top' lobe will include the current pixel. i.e. when |
|
662 // looking at an integral image the value of a pixel at 'x,y' is calculated |
|
663 // using the value of the integral image values above/below that. |
|
664 aLeftLobe++; |
|
665 aTopLobe++; |
|
666 int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe); |
|
667 |
|
668 MOZ_ASSERT(boxSize > 0); |
|
669 |
|
670 if (boxSize == 1) { |
|
671 return; |
|
672 } |
|
673 |
|
674 int32_t stride32bit = aIntegralImageStride / 4; |
|
675 |
|
676 int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value(); |
|
677 |
|
678 GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe, |
|
679 aIntegralImage, aIntegralImageStride, aData, |
|
680 mStride, size); |
|
681 |
|
682 uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize); |
|
683 |
|
684 uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation; |
|
685 |
|
686 // Storing these locally makes this about 30% faster! Presumably the compiler |
|
687 // can't be sure we're not altering the member variables in this loop. |
|
688 IntRect skipRect = mSkipRect; |
|
689 uint8_t *data = aData; |
|
690 int32_t stride = mStride; |
|
691 for (int32_t y = 0; y < size.height; y++) { |
|
692 bool inSkipRectY = y > skipRect.y && y < skipRect.YMost(); |
|
693 |
|
694 uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe); |
|
695 uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe); |
|
696 uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe); |
|
697 uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe); |
|
698 |
|
699 for (int32_t x = 0; x < size.width; x++) { |
|
700 if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) { |
|
701 x = skipRect.XMost() - 1; |
|
702 // Trigger early jump on coming loop iterations, this will be reset |
|
703 // next line anyway. |
|
704 inSkipRectY = false; |
|
705 continue; |
|
706 } |
|
707 int32_t topLeft = topLeftBase[x]; |
|
708 int32_t topRight = topRightBase[x]; |
|
709 int32_t bottomRight = bottomRightBase[x]; |
|
710 int32_t bottomLeft = bottomLeftBase[x]; |
|
711 |
|
712 uint32_t value = bottomRight - topRight - bottomLeft; |
|
713 value += topLeft; |
|
714 |
|
715 data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32; |
|
716 } |
|
717 } |
|
718 } |
|
719 |
|
720 /** |
|
721 * Compute the box blur size (which we're calling the blur radius) from |
|
722 * the standard deviation. |
|
723 * |
|
724 * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for |
|
725 * approximating a Gaussian using box blurs. This yields quite a good |
|
726 * approximation for a Gaussian. Then we multiply this by 1.5 since our |
|
727 * code wants the radius of the entire triple-box-blur kernel instead of |
|
728 * the diameter of an individual box blur. For more details, see: |
|
729 * http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement |
|
730 * https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19 |
|
731 */ |
|
732 static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5); |
|
733 |
|
734 IntSize |
|
735 AlphaBoxBlur::CalculateBlurRadius(const Point& aStd) |
|
736 { |
|
737 IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5)), |
|
738 static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5))); |
|
739 |
|
740 return size; |
|
741 } |
|
742 |
|
743 } |
|
744 } |