1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/skia/trunk/src/effects/SkBlurMask.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,992 @@ 1.4 + 1.5 +/* 1.6 + * Copyright 2006 The Android Open Source Project 1.7 + * 1.8 + * Use of this source code is governed by a BSD-style license that can be 1.9 + * found in the LICENSE file. 1.10 + */ 1.11 + 1.12 + 1.13 +#include "SkBlurMask.h" 1.14 +#include "SkMath.h" 1.15 +#include "SkTemplates.h" 1.16 +#include "SkEndian.h" 1.17 + 1.18 + 1.19 +SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) { 1.20 + // This constant approximates the scaling done in the software path's 1.21 + // "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)). 1.22 + // IMHO, it actually should be 1: we blur "less" than we should do 1.23 + // according to the CSS and canvas specs, simply because Safari does the same. 1.24 + // Firefox used to do the same too, until 4.0 where they fixed it. So at some 1.25 + // point we should probably get rid of these scaling constants and rebaseline 1.26 + // all the blur tests. 1.27 + static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f; 1.28 + 1.29 + return radius ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f; 1.30 +} 1.31 + 1.32 +#define UNROLL_SEPARABLE_LOOPS 1.33 + 1.34 +/** 1.35 + * This function performs a box blur in X, of the given radius. If the 1.36 + * "transpose" parameter is true, it will transpose the pixels on write, 1.37 + * such that X and Y are swapped. Reads are always performed from contiguous 1.38 + * memory in X, for speed. The destination buffer (dst) must be at least 1.39 + * (width + leftRadius + rightRadius) * height bytes in size. 1.40 + * 1.41 + * This is what the inner loop looks like before unrolling, and with the two 1.42 + * cases broken out separately (width < diameter, width >= diameter): 1.43 + * 1.44 + * if (width < diameter) { 1.45 + * for (int x = 0; x < width; ++x) { 1.46 + * sum += *right++; 1.47 + * *dptr = (sum * scale + half) >> 24; 1.48 + * dptr += dst_x_stride; 1.49 + * } 1.50 + * for (int x = width; x < diameter; ++x) { 1.51 + * *dptr = (sum * scale + half) >> 24; 1.52 + * dptr += dst_x_stride; 1.53 + * } 1.54 + * for (int x = 0; x < width; ++x) { 1.55 + * *dptr = (sum * scale + half) >> 24; 1.56 + * sum -= *left++; 1.57 + * dptr += dst_x_stride; 1.58 + * } 1.59 + * } else { 1.60 + * for (int x = 0; x < diameter; ++x) { 1.61 + * sum += *right++; 1.62 + * *dptr = (sum * scale + half) >> 24; 1.63 + * dptr += dst_x_stride; 1.64 + * } 1.65 + * for (int x = diameter; x < width; ++x) { 1.66 + * sum += *right++; 1.67 + * *dptr = (sum * scale + half) >> 24; 1.68 + * sum -= *left++; 1.69 + * dptr += dst_x_stride; 1.70 + * } 1.71 + * for (int x = 0; x < diameter; ++x) { 1.72 + * *dptr = (sum * scale + half) >> 24; 1.73 + * sum -= *left++; 1.74 + * dptr += dst_x_stride; 1.75 + * } 1.76 + * } 1.77 + */ 1.78 +static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst, 1.79 + int leftRadius, int rightRadius, int width, int height, 1.80 + bool transpose) 1.81 +{ 1.82 + int diameter = leftRadius + rightRadius; 1.83 + int kernelSize = diameter + 1; 1.84 + int border = SkMin32(width, diameter); 1.85 + uint32_t scale = (1 << 24) / kernelSize; 1.86 + int new_width = width + SkMax32(leftRadius, rightRadius) * 2; 1.87 + int dst_x_stride = transpose ? height : 1; 1.88 + int dst_y_stride = transpose ? 1 : new_width; 1.89 + uint32_t half = 1 << 23; 1.90 + for (int y = 0; y < height; ++y) { 1.91 + uint32_t sum = 0; 1.92 + uint8_t* dptr = dst + y * dst_y_stride; 1.93 + const uint8_t* right = src + y * src_y_stride; 1.94 + const uint8_t* left = right; 1.95 + for (int x = 0; x < rightRadius - leftRadius; x++) { 1.96 + *dptr = 0; 1.97 + dptr += dst_x_stride; 1.98 + } 1.99 +#define LEFT_BORDER_ITER \ 1.100 + sum += *right++; \ 1.101 + *dptr = (sum * scale + half) >> 24; \ 1.102 + dptr += dst_x_stride; 1.103 + 1.104 + int x = 0; 1.105 +#ifdef UNROLL_SEPARABLE_LOOPS 1.106 + for (; x < border - 16; x += 16) { 1.107 + LEFT_BORDER_ITER 1.108 + LEFT_BORDER_ITER 1.109 + LEFT_BORDER_ITER 1.110 + LEFT_BORDER_ITER 1.111 + LEFT_BORDER_ITER 1.112 + LEFT_BORDER_ITER 1.113 + LEFT_BORDER_ITER 1.114 + LEFT_BORDER_ITER 1.115 + LEFT_BORDER_ITER 1.116 + LEFT_BORDER_ITER 1.117 + LEFT_BORDER_ITER 1.118 + LEFT_BORDER_ITER 1.119 + LEFT_BORDER_ITER 1.120 + LEFT_BORDER_ITER 1.121 + LEFT_BORDER_ITER 1.122 + LEFT_BORDER_ITER 1.123 + } 1.124 +#endif 1.125 + for (; x < border; ++x) { 1.126 + LEFT_BORDER_ITER 1.127 + } 1.128 +#undef LEFT_BORDER_ITER 1.129 +#define TRIVIAL_ITER \ 1.130 + *dptr = (sum * scale + half) >> 24; \ 1.131 + dptr += dst_x_stride; 1.132 + x = width; 1.133 +#ifdef UNROLL_SEPARABLE_LOOPS 1.134 + for (; x < diameter - 16; x += 16) { 1.135 + TRIVIAL_ITER 1.136 + TRIVIAL_ITER 1.137 + TRIVIAL_ITER 1.138 + TRIVIAL_ITER 1.139 + TRIVIAL_ITER 1.140 + TRIVIAL_ITER 1.141 + TRIVIAL_ITER 1.142 + TRIVIAL_ITER 1.143 + TRIVIAL_ITER 1.144 + TRIVIAL_ITER 1.145 + TRIVIAL_ITER 1.146 + TRIVIAL_ITER 1.147 + TRIVIAL_ITER 1.148 + TRIVIAL_ITER 1.149 + TRIVIAL_ITER 1.150 + TRIVIAL_ITER 1.151 + } 1.152 +#endif 1.153 + for (; x < diameter; ++x) { 1.154 + TRIVIAL_ITER 1.155 + } 1.156 +#undef TRIVIAL_ITER 1.157 +#define CENTER_ITER \ 1.158 + sum += *right++; \ 1.159 + *dptr = (sum * scale + half) >> 24; \ 1.160 + sum -= *left++; \ 1.161 + dptr += dst_x_stride; 1.162 + 1.163 + x = diameter; 1.164 +#ifdef UNROLL_SEPARABLE_LOOPS 1.165 + for (; x < width - 16; x += 16) { 1.166 + CENTER_ITER 1.167 + CENTER_ITER 1.168 + CENTER_ITER 1.169 + CENTER_ITER 1.170 + CENTER_ITER 1.171 + CENTER_ITER 1.172 + CENTER_ITER 1.173 + CENTER_ITER 1.174 + CENTER_ITER 1.175 + CENTER_ITER 1.176 + CENTER_ITER 1.177 + CENTER_ITER 1.178 + CENTER_ITER 1.179 + CENTER_ITER 1.180 + CENTER_ITER 1.181 + CENTER_ITER 1.182 + } 1.183 +#endif 1.184 + for (; x < width; ++x) { 1.185 + CENTER_ITER 1.186 + } 1.187 +#undef CENTER_ITER 1.188 +#define RIGHT_BORDER_ITER \ 1.189 + *dptr = (sum * scale + half) >> 24; \ 1.190 + sum -= *left++; \ 1.191 + dptr += dst_x_stride; 1.192 + 1.193 + x = 0; 1.194 +#ifdef UNROLL_SEPARABLE_LOOPS 1.195 + for (; x < border - 16; x += 16) { 1.196 + RIGHT_BORDER_ITER 1.197 + RIGHT_BORDER_ITER 1.198 + RIGHT_BORDER_ITER 1.199 + RIGHT_BORDER_ITER 1.200 + RIGHT_BORDER_ITER 1.201 + RIGHT_BORDER_ITER 1.202 + RIGHT_BORDER_ITER 1.203 + RIGHT_BORDER_ITER 1.204 + RIGHT_BORDER_ITER 1.205 + RIGHT_BORDER_ITER 1.206 + RIGHT_BORDER_ITER 1.207 + RIGHT_BORDER_ITER 1.208 + RIGHT_BORDER_ITER 1.209 + RIGHT_BORDER_ITER 1.210 + RIGHT_BORDER_ITER 1.211 + RIGHT_BORDER_ITER 1.212 + } 1.213 +#endif 1.214 + for (; x < border; ++x) { 1.215 + RIGHT_BORDER_ITER 1.216 + } 1.217 +#undef RIGHT_BORDER_ITER 1.218 + for (int x = 0; x < leftRadius - rightRadius; ++x) { 1.219 + *dptr = 0; 1.220 + dptr += dst_x_stride; 1.221 + } 1.222 + SkASSERT(sum == 0); 1.223 + } 1.224 + return new_width; 1.225 +} 1.226 + 1.227 +/** 1.228 + * This variant of the box blur handles blurring of non-integer radii. It 1.229 + * keeps two running sums: an outer sum for the rounded-up kernel radius, and 1.230 + * an inner sum for the rounded-down kernel radius. For each pixel, it linearly 1.231 + * interpolates between them. In float this would be: 1.232 + * outer_weight * outer_sum / kernelSize + 1.233 + * (1.0 - outer_weight) * innerSum / (kernelSize - 2) 1.234 + * 1.235 + * This is what the inner loop looks like before unrolling, and with the two 1.236 + * cases broken out separately (width < diameter, width >= diameter): 1.237 + * 1.238 + * if (width < diameter) { 1.239 + * for (int x = 0; x < width; x++) { 1.240 + * inner_sum = outer_sum; 1.241 + * outer_sum += *right++; 1.242 + * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.243 + * dptr += dst_x_stride; 1.244 + * } 1.245 + * for (int x = width; x < diameter; ++x) { 1.246 + * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.247 + * dptr += dst_x_stride; 1.248 + * } 1.249 + * for (int x = 0; x < width; x++) { 1.250 + * inner_sum = outer_sum - *left++; 1.251 + * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.252 + * dptr += dst_x_stride; 1.253 + * outer_sum = inner_sum; 1.254 + * } 1.255 + * } else { 1.256 + * for (int x = 0; x < diameter; x++) { 1.257 + * inner_sum = outer_sum; 1.258 + * outer_sum += *right++; 1.259 + * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.260 + * dptr += dst_x_stride; 1.261 + * } 1.262 + * for (int x = diameter; x < width; ++x) { 1.263 + * inner_sum = outer_sum - *left; 1.264 + * outer_sum += *right++; 1.265 + * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.266 + * dptr += dst_x_stride; 1.267 + * outer_sum -= *left++; 1.268 + * } 1.269 + * for (int x = 0; x < diameter; x++) { 1.270 + * inner_sum = outer_sum - *left++; 1.271 + * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.272 + * dptr += dst_x_stride; 1.273 + * outer_sum = inner_sum; 1.274 + * } 1.275 + * } 1.276 + * } 1.277 + * return new_width; 1.278 + */ 1.279 + 1.280 +static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst, 1.281 + int radius, int width, int height, 1.282 + bool transpose, uint8_t outer_weight) 1.283 +{ 1.284 + int diameter = radius * 2; 1.285 + int kernelSize = diameter + 1; 1.286 + int border = SkMin32(width, diameter); 1.287 + int inner_weight = 255 - outer_weight; 1.288 + outer_weight += outer_weight >> 7; 1.289 + inner_weight += inner_weight >> 7; 1.290 + uint32_t outer_scale = (outer_weight << 16) / kernelSize; 1.291 + uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2); 1.292 + uint32_t half = 1 << 23; 1.293 + int new_width = width + diameter; 1.294 + int dst_x_stride = transpose ? height : 1; 1.295 + int dst_y_stride = transpose ? 1 : new_width; 1.296 + for (int y = 0; y < height; ++y) { 1.297 + uint32_t outer_sum = 0, inner_sum = 0; 1.298 + uint8_t* dptr = dst + y * dst_y_stride; 1.299 + const uint8_t* right = src + y * src_y_stride; 1.300 + const uint8_t* left = right; 1.301 + int x = 0; 1.302 + 1.303 +#define LEFT_BORDER_ITER \ 1.304 + inner_sum = outer_sum; \ 1.305 + outer_sum += *right++; \ 1.306 + *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 1.307 + dptr += dst_x_stride; 1.308 + 1.309 +#ifdef UNROLL_SEPARABLE_LOOPS 1.310 + for (;x < border - 16; x += 16) { 1.311 + LEFT_BORDER_ITER 1.312 + LEFT_BORDER_ITER 1.313 + LEFT_BORDER_ITER 1.314 + LEFT_BORDER_ITER 1.315 + LEFT_BORDER_ITER 1.316 + LEFT_BORDER_ITER 1.317 + LEFT_BORDER_ITER 1.318 + LEFT_BORDER_ITER 1.319 + LEFT_BORDER_ITER 1.320 + LEFT_BORDER_ITER 1.321 + LEFT_BORDER_ITER 1.322 + LEFT_BORDER_ITER 1.323 + LEFT_BORDER_ITER 1.324 + LEFT_BORDER_ITER 1.325 + LEFT_BORDER_ITER 1.326 + LEFT_BORDER_ITER 1.327 + } 1.328 +#endif 1.329 + 1.330 + for (;x < border; ++x) { 1.331 + LEFT_BORDER_ITER 1.332 + } 1.333 +#undef LEFT_BORDER_ITER 1.334 + for (int x = width; x < diameter; ++x) { 1.335 + *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 1.336 + dptr += dst_x_stride; 1.337 + } 1.338 + x = diameter; 1.339 + 1.340 +#define CENTER_ITER \ 1.341 + inner_sum = outer_sum - *left; \ 1.342 + outer_sum += *right++; \ 1.343 + *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 1.344 + dptr += dst_x_stride; \ 1.345 + outer_sum -= *left++; 1.346 + 1.347 +#ifdef UNROLL_SEPARABLE_LOOPS 1.348 + for (; x < width - 16; x += 16) { 1.349 + CENTER_ITER 1.350 + CENTER_ITER 1.351 + CENTER_ITER 1.352 + CENTER_ITER 1.353 + CENTER_ITER 1.354 + CENTER_ITER 1.355 + CENTER_ITER 1.356 + CENTER_ITER 1.357 + CENTER_ITER 1.358 + CENTER_ITER 1.359 + CENTER_ITER 1.360 + CENTER_ITER 1.361 + CENTER_ITER 1.362 + CENTER_ITER 1.363 + CENTER_ITER 1.364 + CENTER_ITER 1.365 + } 1.366 +#endif 1.367 + for (; x < width; ++x) { 1.368 + CENTER_ITER 1.369 + } 1.370 +#undef CENTER_ITER 1.371 + 1.372 + #define RIGHT_BORDER_ITER \ 1.373 + inner_sum = outer_sum - *left++; \ 1.374 + *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 1.375 + dptr += dst_x_stride; \ 1.376 + outer_sum = inner_sum; 1.377 + 1.378 + x = 0; 1.379 +#ifdef UNROLL_SEPARABLE_LOOPS 1.380 + for (; x < border - 16; x += 16) { 1.381 + RIGHT_BORDER_ITER 1.382 + RIGHT_BORDER_ITER 1.383 + RIGHT_BORDER_ITER 1.384 + RIGHT_BORDER_ITER 1.385 + RIGHT_BORDER_ITER 1.386 + RIGHT_BORDER_ITER 1.387 + RIGHT_BORDER_ITER 1.388 + RIGHT_BORDER_ITER 1.389 + RIGHT_BORDER_ITER 1.390 + RIGHT_BORDER_ITER 1.391 + RIGHT_BORDER_ITER 1.392 + RIGHT_BORDER_ITER 1.393 + RIGHT_BORDER_ITER 1.394 + RIGHT_BORDER_ITER 1.395 + RIGHT_BORDER_ITER 1.396 + RIGHT_BORDER_ITER 1.397 + } 1.398 +#endif 1.399 + for (; x < border; ++x) { 1.400 + RIGHT_BORDER_ITER 1.401 + } 1.402 +#undef RIGHT_BORDER_ITER 1.403 + SkASSERT(outer_sum == 0 && inner_sum == 0); 1.404 + } 1.405 + return new_width; 1.406 +} 1.407 + 1.408 +static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius) 1.409 +{ 1.410 + *loRadius = *hiRadius = SkScalarCeilToInt(passRadius); 1.411 + if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) { 1.412 + *loRadius = *hiRadius - 1; 1.413 + } 1.414 +} 1.415 + 1.416 +#include "SkColorPriv.h" 1.417 + 1.418 +static void merge_src_with_blur(uint8_t dst[], int dstRB, 1.419 + const uint8_t src[], int srcRB, 1.420 + const uint8_t blur[], int blurRB, 1.421 + int sw, int sh) { 1.422 + dstRB -= sw; 1.423 + srcRB -= sw; 1.424 + blurRB -= sw; 1.425 + while (--sh >= 0) { 1.426 + for (int x = sw - 1; x >= 0; --x) { 1.427 + *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src))); 1.428 + dst += 1; 1.429 + src += 1; 1.430 + blur += 1; 1.431 + } 1.432 + dst += dstRB; 1.433 + src += srcRB; 1.434 + blur += blurRB; 1.435 + } 1.436 +} 1.437 + 1.438 +static void clamp_with_orig(uint8_t dst[], int dstRowBytes, 1.439 + const uint8_t src[], int srcRowBytes, 1.440 + int sw, int sh, 1.441 + SkBlurMask::Style style) { 1.442 + int x; 1.443 + while (--sh >= 0) { 1.444 + switch (style) { 1.445 + case SkBlurMask::kSolid_Style: 1.446 + for (x = sw - 1; x >= 0; --x) { 1.447 + int s = *src; 1.448 + int d = *dst; 1.449 + *dst = SkToU8(s + d - SkMulDiv255Round(s, d)); 1.450 + dst += 1; 1.451 + src += 1; 1.452 + } 1.453 + break; 1.454 + case SkBlurMask::kOuter_Style: 1.455 + for (x = sw - 1; x >= 0; --x) { 1.456 + if (*src) { 1.457 + *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src))); 1.458 + } 1.459 + dst += 1; 1.460 + src += 1; 1.461 + } 1.462 + break; 1.463 + default: 1.464 + SkDEBUGFAIL("Unexpected blur style here"); 1.465 + break; 1.466 + } 1.467 + dst += dstRowBytes - sw; 1.468 + src += srcRowBytes - sw; 1.469 + } 1.470 +} 1.471 + 1.472 +/////////////////////////////////////////////////////////////////////////////// 1.473 + 1.474 +// we use a local function to wrap the class static method to work around 1.475 +// a bug in gcc98 1.476 +void SkMask_FreeImage(uint8_t* image); 1.477 +void SkMask_FreeImage(uint8_t* image) { 1.478 + SkMask::FreeImage(image); 1.479 +} 1.480 + 1.481 +bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, 1.482 + SkScalar sigma, Style style, Quality quality, 1.483 + SkIPoint* margin) { 1.484 + 1.485 + if (src.fFormat != SkMask::kA8_Format) { 1.486 + return false; 1.487 + } 1.488 + 1.489 + // Force high quality off for small radii (performance) 1.490 + if (sigma <= SkIntToScalar(2)) { 1.491 + quality = kLow_Quality; 1.492 + } 1.493 + 1.494 + SkScalar passRadius; 1.495 + if (kHigh_Quality == quality) { 1.496 + // For the high quality path the 3 pass box blur kernel width is 1.497 + // 6*rad+1 while the full Gaussian width is 6*sigma. 1.498 + passRadius = sigma - (1/6.0f); 1.499 + } else { 1.500 + // For the low quality path we only attempt to cover 3*sigma of the 1.501 + // Gaussian blur area (1.5*sigma on each side). The single pass box 1.502 + // blur's kernel size is 2*rad+1. 1.503 + passRadius = 1.5f*sigma - 0.5f; 1.504 + } 1.505 + 1.506 + // highQuality: use three box blur passes as a cheap way 1.507 + // to approximate a Gaussian blur 1.508 + int passCount = (kHigh_Quality == quality) ? 3 : 1; 1.509 + 1.510 + int rx = SkScalarCeilToInt(passRadius); 1.511 + int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255); 1.512 + 1.513 + SkASSERT(rx >= 0); 1.514 + SkASSERT((unsigned)outerWeight <= 255); 1.515 + if (rx <= 0) { 1.516 + return false; 1.517 + } 1.518 + 1.519 + int ry = rx; // only do square blur for now 1.520 + 1.521 + int padx = passCount * rx; 1.522 + int pady = passCount * ry; 1.523 + 1.524 + if (margin) { 1.525 + margin->set(padx, pady); 1.526 + } 1.527 + dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady, 1.528 + src.fBounds.fRight + padx, src.fBounds.fBottom + pady); 1.529 + 1.530 + dst->fRowBytes = dst->fBounds.width(); 1.531 + dst->fFormat = SkMask::kA8_Format; 1.532 + dst->fImage = NULL; 1.533 + 1.534 + if (src.fImage) { 1.535 + size_t dstSize = dst->computeImageSize(); 1.536 + if (0 == dstSize) { 1.537 + return false; // too big to allocate, abort 1.538 + } 1.539 + 1.540 + int sw = src.fBounds.width(); 1.541 + int sh = src.fBounds.height(); 1.542 + const uint8_t* sp = src.fImage; 1.543 + uint8_t* dp = SkMask::AllocImage(dstSize); 1.544 + SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); 1.545 + 1.546 + // build the blurry destination 1.547 + SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); 1.548 + uint8_t* tp = tmpBuffer.get(); 1.549 + int w = sw, h = sh; 1.550 + 1.551 + if (outerWeight == 255) { 1.552 + int loRadius, hiRadius; 1.553 + get_adjusted_radii(passRadius, &loRadius, &hiRadius); 1.554 + if (kHigh_Quality == quality) { 1.555 + // Do three X blurs, with a transpose on the final one. 1.556 + w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false); 1.557 + w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false); 1.558 + w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true); 1.559 + // Do three Y blurs, with a transpose on the final one. 1.560 + h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false); 1.561 + h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false); 1.562 + h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true); 1.563 + } else { 1.564 + w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); 1.565 + h = boxBlur(tp, h, dp, ry, ry, h, w, true); 1.566 + } 1.567 + } else { 1.568 + if (kHigh_Quality == quality) { 1.569 + // Do three X blurs, with a transpose on the final one. 1.570 + w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight); 1.571 + w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight); 1.572 + w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight); 1.573 + // Do three Y blurs, with a transpose on the final one. 1.574 + h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight); 1.575 + h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight); 1.576 + h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight); 1.577 + } else { 1.578 + w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight); 1.579 + h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight); 1.580 + } 1.581 + } 1.582 + 1.583 + dst->fImage = dp; 1.584 + // if need be, alloc the "real" dst (same size as src) and copy/merge 1.585 + // the blur into it (applying the src) 1.586 + if (style == kInner_Style) { 1.587 + // now we allocate the "real" dst, mirror the size of src 1.588 + size_t srcSize = src.computeImageSize(); 1.589 + if (0 == srcSize) { 1.590 + return false; // too big to allocate, abort 1.591 + } 1.592 + dst->fImage = SkMask::AllocImage(srcSize); 1.593 + merge_src_with_blur(dst->fImage, src.fRowBytes, 1.594 + sp, src.fRowBytes, 1.595 + dp + passCount * (rx + ry * dst->fRowBytes), 1.596 + dst->fRowBytes, sw, sh); 1.597 + SkMask::FreeImage(dp); 1.598 + } else if (style != kNormal_Style) { 1.599 + clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes), 1.600 + dst->fRowBytes, sp, src.fRowBytes, sw, sh, style); 1.601 + } 1.602 + (void)autoCall.detach(); 1.603 + } 1.604 + 1.605 + if (style == kInner_Style) { 1.606 + dst->fBounds = src.fBounds; // restore trimmed bounds 1.607 + dst->fRowBytes = src.fRowBytes; 1.608 + } 1.609 + 1.610 + return true; 1.611 +} 1.612 + 1.613 +/* Convolving a box with itself three times results in a piecewise 1.614 + quadratic function: 1.615 + 1.616 + 0 x <= -1.5 1.617 + 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 1.618 + 3/4 - x^2 -.5 < x <= .5 1.619 + 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 1.620 + 0 1.5 < x 1.621 + 1.622 + Mathematica: 1.623 + 1.624 + g[x_] := Piecewise [ { 1.625 + {9/8 + 3/2 x + 1/2 x^2 , -1.5 < x <= -.5}, 1.626 + {3/4 - x^2 , -.5 < x <= .5}, 1.627 + {9/8 - 3/2 x + 1/2 x^2 , 0.5 < x <= 1.5} 1.628 + }, 0] 1.629 + 1.630 + To get the profile curve of the blurred step function at the rectangle 1.631 + edge, we evaluate the indefinite integral, which is piecewise cubic: 1.632 + 1.633 + 0 x <= -1.5 1.634 + 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5 1.635 + 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5 1.636 + 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5 1.637 + 1 1.5 < x 1.638 + 1.639 + in Mathematica code: 1.640 + 1.641 + gi[x_] := Piecewise[ { 1.642 + { 0 , x <= -1.5 }, 1.643 + { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 }, 1.644 + { 1/2 + 3/4 x - 1/3 x^3 , -.5 < x <= .5}, 1.645 + { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3, .5 < x <= 1.5} 1.646 + },1] 1.647 +*/ 1.648 + 1.649 +static float gaussianIntegral(float x) { 1.650 + if (x > 1.5f) { 1.651 + return 0.0f; 1.652 + } 1.653 + if (x < -1.5f) { 1.654 + return 1.0f; 1.655 + } 1.656 + 1.657 + float x2 = x*x; 1.658 + float x3 = x2*x; 1.659 + 1.660 + if ( x > 0.5f ) { 1.661 + return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x); 1.662 + } 1.663 + if ( x > -0.5f ) { 1.664 + return 0.5f - (0.75f * x - x3 / 3.0f); 1.665 + } 1.666 + return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x); 1.667 +} 1.668 + 1.669 +/* ComputeBlurProfile allocates and fills in an array of floating 1.670 + point values between 0 and 255 for the profile signature of 1.671 + a blurred half-plane with the given blur radius. Since we're 1.672 + going to be doing screened multiplications (i.e., 1 - (1-x)(1-y)) 1.673 + all the time, we actually fill in the profile pre-inverted 1.674 + (already done 255-x). 1.675 + 1.676 + It's the responsibility of the caller to delete the 1.677 + memory returned in profile_out. 1.678 +*/ 1.679 + 1.680 +void SkBlurMask::ComputeBlurProfile(SkScalar sigma, uint8_t **profile_out) { 1.681 + int size = SkScalarCeilToInt(6*sigma); 1.682 + 1.683 + int center = size >> 1; 1.684 + uint8_t *profile = SkNEW_ARRAY(uint8_t, size); 1.685 + 1.686 + float invr = 1.f/(2*sigma); 1.687 + 1.688 + profile[0] = 255; 1.689 + for (int x = 1 ; x < size ; ++x) { 1.690 + float scaled_x = (center - x - .5f) * invr; 1.691 + float gi = gaussianIntegral(scaled_x); 1.692 + profile[x] = 255 - (uint8_t) (255.f * gi); 1.693 + } 1.694 + 1.695 + *profile_out = profile; 1.696 +} 1.697 + 1.698 +// TODO MAYBE: Maintain a profile cache to avoid recomputing this for 1.699 +// commonly used radii. Consider baking some of the most common blur radii 1.700 +// directly in as static data? 1.701 + 1.702 +// Implementation adapted from Michael Herf's approach: 1.703 +// http://stereopsis.com/shadowrect/ 1.704 + 1.705 +uint8_t SkBlurMask::ProfileLookup(const uint8_t *profile, int loc, int blurred_width, int sharp_width) { 1.706 + int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge? 1.707 + int ox = dx >> 1; 1.708 + if (ox < 0) { 1.709 + ox = 0; 1.710 + } 1.711 + 1.712 + return profile[ox]; 1.713 +} 1.714 + 1.715 +void SkBlurMask::ComputeBlurredScanline(uint8_t *pixels, const uint8_t *profile, 1.716 + unsigned int width, SkScalar sigma) { 1.717 + 1.718 + unsigned int profile_size = SkScalarCeilToInt(6*sigma); 1.719 + SkAutoTMalloc<uint8_t> horizontalScanline(width); 1.720 + 1.721 + unsigned int sw = width - profile_size; 1.722 + // nearest odd number less than the profile size represents the center 1.723 + // of the (2x scaled) profile 1.724 + int center = ( profile_size & ~1 ) - 1; 1.725 + 1.726 + int w = sw - center; 1.727 + 1.728 + for (unsigned int x = 0 ; x < width ; ++x) { 1.729 + if (profile_size <= sw) { 1.730 + pixels[x] = ProfileLookup(profile, x, width, w); 1.731 + } else { 1.732 + float span = float(sw)/(2*sigma); 1.733 + float giX = 1.5f - (x+.5f)/(2*sigma); 1.734 + pixels[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span))); 1.735 + } 1.736 + } 1.737 +} 1.738 + 1.739 +bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst, 1.740 + const SkRect &src, Style style, 1.741 + SkIPoint *margin, SkMask::CreateMode createMode) { 1.742 + int profile_size = SkScalarCeilToInt(6*sigma); 1.743 + 1.744 + int pad = profile_size/2; 1.745 + if (margin) { 1.746 + margin->set( pad, pad ); 1.747 + } 1.748 + 1.749 + dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad), 1.750 + SkScalarRoundToInt(src.fTop - pad), 1.751 + SkScalarRoundToInt(src.fRight + pad), 1.752 + SkScalarRoundToInt(src.fBottom + pad)); 1.753 + 1.754 + dst->fRowBytes = dst->fBounds.width(); 1.755 + dst->fFormat = SkMask::kA8_Format; 1.756 + dst->fImage = NULL; 1.757 + 1.758 + int sw = SkScalarFloorToInt(src.width()); 1.759 + int sh = SkScalarFloorToInt(src.height()); 1.760 + 1.761 + if (createMode == SkMask::kJustComputeBounds_CreateMode) { 1.762 + if (style == kInner_Style) { 1.763 + dst->fBounds.set(SkScalarRoundToInt(src.fLeft), 1.764 + SkScalarRoundToInt(src.fTop), 1.765 + SkScalarRoundToInt(src.fRight), 1.766 + SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds 1.767 + dst->fRowBytes = sw; 1.768 + } 1.769 + return true; 1.770 + } 1.771 + uint8_t *profile = NULL; 1.772 + 1.773 + ComputeBlurProfile(sigma, &profile); 1.774 + SkAutoTDeleteArray<uint8_t> ada(profile); 1.775 + 1.776 + size_t dstSize = dst->computeImageSize(); 1.777 + if (0 == dstSize) { 1.778 + return false; // too big to allocate, abort 1.779 + } 1.780 + 1.781 + uint8_t* dp = SkMask::AllocImage(dstSize); 1.782 + 1.783 + dst->fImage = dp; 1.784 + 1.785 + int dstHeight = dst->fBounds.height(); 1.786 + int dstWidth = dst->fBounds.width(); 1.787 + 1.788 + uint8_t *outptr = dp; 1.789 + 1.790 + SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth); 1.791 + SkAutoTMalloc<uint8_t> verticalScanline(dstHeight); 1.792 + 1.793 + ComputeBlurredScanline(horizontalScanline, profile, dstWidth, sigma); 1.794 + ComputeBlurredScanline(verticalScanline, profile, dstHeight, sigma); 1.795 + 1.796 + for (int y = 0 ; y < dstHeight ; ++y) { 1.797 + for (int x = 0 ; x < dstWidth ; x++) { 1.798 + unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], verticalScanline[y]); 1.799 + *(outptr++) = maskval; 1.800 + } 1.801 + } 1.802 + 1.803 + if (style == kInner_Style) { 1.804 + // now we allocate the "real" dst, mirror the size of src 1.805 + size_t srcSize = (size_t)(src.width() * src.height()); 1.806 + if (0 == srcSize) { 1.807 + return false; // too big to allocate, abort 1.808 + } 1.809 + dst->fImage = SkMask::AllocImage(srcSize); 1.810 + for (int y = 0 ; y < sh ; y++) { 1.811 + uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad; 1.812 + uint8_t *inner_scanline = dst->fImage + y*sw; 1.813 + memcpy(inner_scanline, blur_scanline, sw); 1.814 + } 1.815 + SkMask::FreeImage(dp); 1.816 + 1.817 + dst->fBounds.set(SkScalarRoundToInt(src.fLeft), 1.818 + SkScalarRoundToInt(src.fTop), 1.819 + SkScalarRoundToInt(src.fRight), 1.820 + SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds 1.821 + dst->fRowBytes = sw; 1.822 + 1.823 + } else if (style == kOuter_Style) { 1.824 + for (int y = pad ; y < dstHeight-pad ; y++) { 1.825 + uint8_t *dst_scanline = dp + y*dstWidth + pad; 1.826 + memset(dst_scanline, 0, sw); 1.827 + } 1.828 + } else if (style == kSolid_Style) { 1.829 + for (int y = pad ; y < dstHeight-pad ; y++) { 1.830 + uint8_t *dst_scanline = dp + y*dstWidth + pad; 1.831 + memset(dst_scanline, 0xff, sw); 1.832 + } 1.833 + } 1.834 + // normal and solid styles are the same for analytic rect blurs, so don't 1.835 + // need to handle solid specially. 1.836 + 1.837 + return true; 1.838 +} 1.839 + 1.840 +bool SkBlurMask::BlurRRect(SkScalar sigma, SkMask *dst, 1.841 + const SkRRect &src, Style style, 1.842 + SkIPoint *margin, SkMask::CreateMode createMode) { 1.843 + // Temporary for now -- always fail, should cause caller to fall back 1.844 + // to old path. Plumbing just to land API and parallelize effort. 1.845 + 1.846 + return false; 1.847 +} 1.848 + 1.849 +// The "simple" blur is a direct implementation of separable convolution with a discrete 1.850 +// gaussian kernel. It's "ground truth" in a sense; too slow to be used, but very 1.851 +// useful for correctness comparisons. 1.852 + 1.853 +bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src, 1.854 + Style style, SkIPoint* margin) { 1.855 + 1.856 + if (src.fFormat != SkMask::kA8_Format) { 1.857 + return false; 1.858 + } 1.859 + 1.860 + float variance = sigma * sigma; 1.861 + 1.862 + int windowSize = SkScalarCeilToInt(sigma*6); 1.863 + // round window size up to nearest odd number 1.864 + windowSize |= 1; 1.865 + 1.866 + SkAutoTMalloc<float> gaussWindow(windowSize); 1.867 + 1.868 + int halfWindow = windowSize >> 1; 1.869 + 1.870 + gaussWindow[halfWindow] = 1; 1.871 + 1.872 + float windowSum = 1; 1.873 + for (int x = 1 ; x <= halfWindow ; ++x) { 1.874 + float gaussian = expf(-x*x / (2*variance)); 1.875 + gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian; 1.876 + windowSum += 2*gaussian; 1.877 + } 1.878 + 1.879 + // leave the filter un-normalized for now; we will divide by the normalization 1.880 + // sum later; 1.881 + 1.882 + int pad = halfWindow; 1.883 + if (margin) { 1.884 + margin->set( pad, pad ); 1.885 + } 1.886 + 1.887 + dst->fBounds = src.fBounds; 1.888 + dst->fBounds.outset(pad, pad); 1.889 + 1.890 + dst->fRowBytes = dst->fBounds.width(); 1.891 + dst->fFormat = SkMask::kA8_Format; 1.892 + dst->fImage = NULL; 1.893 + 1.894 + if (src.fImage) { 1.895 + 1.896 + size_t dstSize = dst->computeImageSize(); 1.897 + if (0 == dstSize) { 1.898 + return false; // too big to allocate, abort 1.899 + } 1.900 + 1.901 + int srcWidth = src.fBounds.width(); 1.902 + int srcHeight = src.fBounds.height(); 1.903 + int dstWidth = dst->fBounds.width(); 1.904 + 1.905 + const uint8_t* srcPixels = src.fImage; 1.906 + uint8_t* dstPixels = SkMask::AllocImage(dstSize); 1.907 + SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels); 1.908 + 1.909 + // do the actual blur. First, make a padded copy of the source. 1.910 + // use double pad so we never have to check if we're outside anything 1.911 + 1.912 + int padWidth = srcWidth + 4*pad; 1.913 + int padHeight = srcHeight; 1.914 + int padSize = padWidth * padHeight; 1.915 + 1.916 + SkAutoTMalloc<uint8_t> padPixels(padSize); 1.917 + memset(padPixels, 0, padSize); 1.918 + 1.919 + for (int y = 0 ; y < srcHeight; ++y) { 1.920 + uint8_t* padptr = padPixels + y * padWidth + 2*pad; 1.921 + const uint8_t* srcptr = srcPixels + y * srcWidth; 1.922 + memcpy(padptr, srcptr, srcWidth); 1.923 + } 1.924 + 1.925 + // blur in X, transposing the result into a temporary floating point buffer. 1.926 + // also double-pad the intermediate result so that the second blur doesn't 1.927 + // have to do extra conditionals. 1.928 + 1.929 + int tmpWidth = padHeight + 4*pad; 1.930 + int tmpHeight = padWidth - 2*pad; 1.931 + int tmpSize = tmpWidth * tmpHeight; 1.932 + 1.933 + SkAutoTMalloc<float> tmpImage(tmpSize); 1.934 + memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0])); 1.935 + 1.936 + for (int y = 0 ; y < padHeight ; ++y) { 1.937 + uint8_t *srcScanline = padPixels + y*padWidth; 1.938 + for (int x = pad ; x < padWidth - pad ; ++x) { 1.939 + float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output 1.940 + uint8_t *windowCenter = srcScanline + x; 1.941 + for (int i = -pad ; i <= pad ; ++i) { 1.942 + *outPixel += gaussWindow[pad+i]*windowCenter[i]; 1.943 + } 1.944 + *outPixel /= windowSum; 1.945 + } 1.946 + } 1.947 + 1.948 + // blur in Y; now filling in the actual desired destination. We have to do 1.949 + // the transpose again; these transposes guarantee that we read memory in 1.950 + // linear order. 1.951 + 1.952 + for (int y = 0 ; y < tmpHeight ; ++y) { 1.953 + float *srcScanline = tmpImage + y*tmpWidth; 1.954 + for (int x = pad ; x < tmpWidth - pad ; ++x) { 1.955 + float *windowCenter = srcScanline + x; 1.956 + float finalValue = 0; 1.957 + for (int i = -pad ; i <= pad ; ++i) { 1.958 + finalValue += gaussWindow[pad+i]*windowCenter[i]; 1.959 + } 1.960 + finalValue /= windowSum; 1.961 + uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output 1.962 + int integerPixel = int(finalValue + 0.5f); 1.963 + *outPixel = SkClampMax( SkClampPos(integerPixel), 255 ); 1.964 + } 1.965 + } 1.966 + 1.967 + dst->fImage = dstPixels; 1.968 + // if need be, alloc the "real" dst (same size as src) and copy/merge 1.969 + // the blur into it (applying the src) 1.970 + if (style == kInner_Style) { 1.971 + // now we allocate the "real" dst, mirror the size of src 1.972 + size_t srcSize = src.computeImageSize(); 1.973 + if (0 == srcSize) { 1.974 + return false; // too big to allocate, abort 1.975 + } 1.976 + dst->fImage = SkMask::AllocImage(srcSize); 1.977 + merge_src_with_blur(dst->fImage, src.fRowBytes, 1.978 + srcPixels, src.fRowBytes, 1.979 + dstPixels + pad*dst->fRowBytes + pad, 1.980 + dst->fRowBytes, srcWidth, srcHeight); 1.981 + SkMask::FreeImage(dstPixels); 1.982 + } else if (style != kNormal_Style) { 1.983 + clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad, 1.984 + dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style); 1.985 + } 1.986 + (void)autoCall.detach(); 1.987 + } 1.988 + 1.989 + if (style == kInner_Style) { 1.990 + dst->fBounds = src.fBounds; // restore trimmed bounds 1.991 + dst->fRowBytes = src.fRowBytes; 1.992 + } 1.993 + 1.994 + return true; 1.995 +}