1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/skia/trunk/src/core/SkConvolver.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,461 @@ 1.4 +// Copyright (c) 2011 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 + 1.8 +#include "SkConvolver.h" 1.9 +#include "SkSize.h" 1.10 +#include "SkTypes.h" 1.11 + 1.12 +namespace { 1.13 + 1.14 + // Converts the argument to an 8-bit unsigned value by clamping to the range 1.15 + // 0-255. 1.16 + inline unsigned char ClampTo8(int a) { 1.17 + if (static_cast<unsigned>(a) < 256) { 1.18 + return a; // Avoid the extra check in the common case. 1.19 + } 1.20 + if (a < 0) { 1.21 + return 0; 1.22 + } 1.23 + return 255; 1.24 + } 1.25 + 1.26 + // Stores a list of rows in a circular buffer. The usage is you write into it 1.27 + // by calling AdvanceRow. It will keep track of which row in the buffer it 1.28 + // should use next, and the total number of rows added. 1.29 + class CircularRowBuffer { 1.30 + public: 1.31 + // The number of pixels in each row is given in |sourceRowPixelWidth|. 1.32 + // The maximum number of rows needed in the buffer is |maxYFilterSize| 1.33 + // (we only need to store enough rows for the biggest filter). 1.34 + // 1.35 + // We use the |firstInputRow| to compute the coordinates of all of the 1.36 + // following rows returned by Advance(). 1.37 + CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, 1.38 + int firstInputRow) 1.39 + : fRowByteWidth(destRowPixelWidth * 4), 1.40 + fNumRows(maxYFilterSize), 1.41 + fNextRow(0), 1.42 + fNextRowCoordinate(firstInputRow) { 1.43 + fBuffer.reset(fRowByteWidth * maxYFilterSize); 1.44 + fRowAddresses.reset(fNumRows); 1.45 + } 1.46 + 1.47 + // Moves to the next row in the buffer, returning a pointer to the beginning 1.48 + // of it. 1.49 + unsigned char* advanceRow() { 1.50 + unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; 1.51 + fNextRowCoordinate++; 1.52 + 1.53 + // Set the pointer to the next row to use, wrapping around if necessary. 1.54 + fNextRow++; 1.55 + if (fNextRow == fNumRows) { 1.56 + fNextRow = 0; 1.57 + } 1.58 + return row; 1.59 + } 1.60 + 1.61 + // Returns a pointer to an "unrolled" array of rows. These rows will start 1.62 + // at the y coordinate placed into |*firstRowIndex| and will continue in 1.63 + // order for the maximum number of rows in this circular buffer. 1.64 + // 1.65 + // The |firstRowIndex_| may be negative. This means the circular buffer 1.66 + // starts before the top of the image (it hasn't been filled yet). 1.67 + unsigned char* const* GetRowAddresses(int* firstRowIndex) { 1.68 + // Example for a 4-element circular buffer holding coords 6-9. 1.69 + // Row 0 Coord 8 1.70 + // Row 1 Coord 9 1.71 + // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. 1.72 + // Row 3 Coord 7 1.73 + // 1.74 + // The "next" row is also the first (lowest) coordinate. This computation 1.75 + // may yield a negative value, but that's OK, the math will work out 1.76 + // since the user of this buffer will compute the offset relative 1.77 + // to the firstRowIndex and the negative rows will never be used. 1.78 + *firstRowIndex = fNextRowCoordinate - fNumRows; 1.79 + 1.80 + int curRow = fNextRow; 1.81 + for (int i = 0; i < fNumRows; i++) { 1.82 + fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; 1.83 + 1.84 + // Advance to the next row, wrapping if necessary. 1.85 + curRow++; 1.86 + if (curRow == fNumRows) { 1.87 + curRow = 0; 1.88 + } 1.89 + } 1.90 + return &fRowAddresses[0]; 1.91 + } 1.92 + 1.93 + private: 1.94 + // The buffer storing the rows. They are packed, each one fRowByteWidth. 1.95 + SkTArray<unsigned char> fBuffer; 1.96 + 1.97 + // Number of bytes per row in the |buffer|. 1.98 + int fRowByteWidth; 1.99 + 1.100 + // The number of rows available in the buffer. 1.101 + int fNumRows; 1.102 + 1.103 + // The next row index we should write into. This wraps around as the 1.104 + // circular buffer is used. 1.105 + int fNextRow; 1.106 + 1.107 + // The y coordinate of the |fNextRow|. This is incremented each time a 1.108 + // new row is appended and does not wrap. 1.109 + int fNextRowCoordinate; 1.110 + 1.111 + // Buffer used by GetRowAddresses(). 1.112 + SkTArray<unsigned char*> fRowAddresses; 1.113 + }; 1.114 + 1.115 +// Convolves horizontally along a single row. The row data is given in 1.116 +// |srcData| and continues for the numValues() of the filter. 1.117 +template<bool hasAlpha> 1.118 + void ConvolveHorizontally(const unsigned char* srcData, 1.119 + const SkConvolutionFilter1D& filter, 1.120 + unsigned char* outRow) { 1.121 + // Loop over each pixel on this row in the output image. 1.122 + int numValues = filter.numValues(); 1.123 + for (int outX = 0; outX < numValues; outX++) { 1.124 + // Get the filter that determines the current output pixel. 1.125 + int filterOffset, filterLength; 1.126 + const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 1.127 + filter.FilterForValue(outX, &filterOffset, &filterLength); 1.128 + 1.129 + // Compute the first pixel in this row that the filter affects. It will 1.130 + // touch |filterLength| pixels (4 bytes each) after this. 1.131 + const unsigned char* rowToFilter = &srcData[filterOffset * 4]; 1.132 + 1.133 + // Apply the filter to the row to get the destination pixel in |accum|. 1.134 + int accum[4] = {0}; 1.135 + for (int filterX = 0; filterX < filterLength; filterX++) { 1.136 + SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX]; 1.137 + accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; 1.138 + accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; 1.139 + accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; 1.140 + if (hasAlpha) { 1.141 + accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; 1.142 + } 1.143 + } 1.144 + 1.145 + // Bring this value back in range. All of the filter scaling factors 1.146 + // are in fixed point with kShiftBits bits of fractional part. 1.147 + accum[0] >>= SkConvolutionFilter1D::kShiftBits; 1.148 + accum[1] >>= SkConvolutionFilter1D::kShiftBits; 1.149 + accum[2] >>= SkConvolutionFilter1D::kShiftBits; 1.150 + if (hasAlpha) { 1.151 + accum[3] >>= SkConvolutionFilter1D::kShiftBits; 1.152 + } 1.153 + 1.154 + // Store the new pixel. 1.155 + outRow[outX * 4 + 0] = ClampTo8(accum[0]); 1.156 + outRow[outX * 4 + 1] = ClampTo8(accum[1]); 1.157 + outRow[outX * 4 + 2] = ClampTo8(accum[2]); 1.158 + if (hasAlpha) { 1.159 + outRow[outX * 4 + 3] = ClampTo8(accum[3]); 1.160 + } 1.161 + } 1.162 + } 1.163 + 1.164 +// Does vertical convolution to produce one output row. The filter values and 1.165 +// length are given in the first two parameters. These are applied to each 1.166 +// of the rows pointed to in the |sourceDataRows| array, with each row 1.167 +// being |pixelWidth| wide. 1.168 +// 1.169 +// The output must have room for |pixelWidth * 4| bytes. 1.170 +template<bool hasAlpha> 1.171 + void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 1.172 + int filterLength, 1.173 + unsigned char* const* sourceDataRows, 1.174 + int pixelWidth, 1.175 + unsigned char* outRow) { 1.176 + // We go through each column in the output and do a vertical convolution, 1.177 + // generating one output pixel each time. 1.178 + for (int outX = 0; outX < pixelWidth; outX++) { 1.179 + // Compute the number of bytes over in each row that the current column 1.180 + // we're convolving starts at. The pixel will cover the next 4 bytes. 1.181 + int byteOffset = outX * 4; 1.182 + 1.183 + // Apply the filter to one column of pixels. 1.184 + int accum[4] = {0}; 1.185 + for (int filterY = 0; filterY < filterLength; filterY++) { 1.186 + SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY]; 1.187 + accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; 1.188 + accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; 1.189 + accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; 1.190 + if (hasAlpha) { 1.191 + accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3]; 1.192 + } 1.193 + } 1.194 + 1.195 + // Bring this value back in range. All of the filter scaling factors 1.196 + // are in fixed point with kShiftBits bits of precision. 1.197 + accum[0] >>= SkConvolutionFilter1D::kShiftBits; 1.198 + accum[1] >>= SkConvolutionFilter1D::kShiftBits; 1.199 + accum[2] >>= SkConvolutionFilter1D::kShiftBits; 1.200 + if (hasAlpha) { 1.201 + accum[3] >>= SkConvolutionFilter1D::kShiftBits; 1.202 + } 1.203 + 1.204 + // Store the new pixel. 1.205 + outRow[byteOffset + 0] = ClampTo8(accum[0]); 1.206 + outRow[byteOffset + 1] = ClampTo8(accum[1]); 1.207 + outRow[byteOffset + 2] = ClampTo8(accum[2]); 1.208 + if (hasAlpha) { 1.209 + unsigned char alpha = ClampTo8(accum[3]); 1.210 + 1.211 + // Make sure the alpha channel doesn't come out smaller than any of the 1.212 + // color channels. We use premultipled alpha channels, so this should 1.213 + // never happen, but rounding errors will cause this from time to time. 1.214 + // These "impossible" colors will cause overflows (and hence random pixel 1.215 + // values) when the resulting bitmap is drawn to the screen. 1.216 + // 1.217 + // We only need to do this when generating the final output row (here). 1.218 + int maxColorChannel = SkTMax(outRow[byteOffset + 0], 1.219 + SkTMax(outRow[byteOffset + 1], 1.220 + outRow[byteOffset + 2])); 1.221 + if (alpha < maxColorChannel) { 1.222 + outRow[byteOffset + 3] = maxColorChannel; 1.223 + } else { 1.224 + outRow[byteOffset + 3] = alpha; 1.225 + } 1.226 + } else { 1.227 + // No alpha channel, the image is opaque. 1.228 + outRow[byteOffset + 3] = 0xff; 1.229 + } 1.230 + } 1.231 + } 1.232 + 1.233 + void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 1.234 + int filterLength, 1.235 + unsigned char* const* sourceDataRows, 1.236 + int pixelWidth, 1.237 + unsigned char* outRow, 1.238 + bool sourceHasAlpha) { 1.239 + if (sourceHasAlpha) { 1.240 + ConvolveVertically<true>(filterValues, filterLength, 1.241 + sourceDataRows, pixelWidth, 1.242 + outRow); 1.243 + } else { 1.244 + ConvolveVertically<false>(filterValues, filterLength, 1.245 + sourceDataRows, pixelWidth, 1.246 + outRow); 1.247 + } 1.248 + } 1.249 + 1.250 +} // namespace 1.251 + 1.252 +// SkConvolutionFilter1D --------------------------------------------------------- 1.253 + 1.254 +SkConvolutionFilter1D::SkConvolutionFilter1D() 1.255 +: fMaxFilter(0) { 1.256 +} 1.257 + 1.258 +SkConvolutionFilter1D::~SkConvolutionFilter1D() { 1.259 +} 1.260 + 1.261 +void SkConvolutionFilter1D::AddFilter(int filterOffset, 1.262 + const float* filterValues, 1.263 + int filterLength) { 1.264 + SkASSERT(filterLength > 0); 1.265 + 1.266 + SkTArray<ConvolutionFixed> fixedValues; 1.267 + fixedValues.reset(filterLength); 1.268 + 1.269 + for (int i = 0; i < filterLength; ++i) { 1.270 + fixedValues.push_back(FloatToFixed(filterValues[i])); 1.271 + } 1.272 + 1.273 + AddFilter(filterOffset, &fixedValues[0], filterLength); 1.274 +} 1.275 + 1.276 +void SkConvolutionFilter1D::AddFilter(int filterOffset, 1.277 + const ConvolutionFixed* filterValues, 1.278 + int filterLength) { 1.279 + // It is common for leading/trailing filter values to be zeros. In such 1.280 + // cases it is beneficial to only store the central factors. 1.281 + // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on 1.282 + // a 1080p image this optimization gives a ~10% speed improvement. 1.283 + int filterSize = filterLength; 1.284 + int firstNonZero = 0; 1.285 + while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { 1.286 + firstNonZero++; 1.287 + } 1.288 + 1.289 + if (firstNonZero < filterLength) { 1.290 + // Here we have at least one non-zero factor. 1.291 + int lastNonZero = filterLength - 1; 1.292 + while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { 1.293 + lastNonZero--; 1.294 + } 1.295 + 1.296 + filterOffset += firstNonZero; 1.297 + filterLength = lastNonZero + 1 - firstNonZero; 1.298 + SkASSERT(filterLength > 0); 1.299 + 1.300 + for (int i = firstNonZero; i <= lastNonZero; i++) { 1.301 + fFilterValues.push_back(filterValues[i]); 1.302 + } 1.303 + } else { 1.304 + // Here all the factors were zeroes. 1.305 + filterLength = 0; 1.306 + } 1.307 + 1.308 + FilterInstance instance; 1.309 + 1.310 + // We pushed filterLength elements onto fFilterValues 1.311 + instance.fDataLocation = (static_cast<int>(fFilterValues.count()) - 1.312 + filterLength); 1.313 + instance.fOffset = filterOffset; 1.314 + instance.fTrimmedLength = filterLength; 1.315 + instance.fLength = filterSize; 1.316 + fFilters.push_back(instance); 1.317 + 1.318 + fMaxFilter = SkTMax(fMaxFilter, filterLength); 1.319 +} 1.320 + 1.321 +const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter( 1.322 + int* specifiedFilterlength, 1.323 + int* filterOffset, 1.324 + int* filterLength) const { 1.325 + const FilterInstance& filter = fFilters[0]; 1.326 + *filterOffset = filter.fOffset; 1.327 + *filterLength = filter.fTrimmedLength; 1.328 + *specifiedFilterlength = filter.fLength; 1.329 + if (filter.fTrimmedLength == 0) { 1.330 + return NULL; 1.331 + } 1.332 + 1.333 + return &fFilterValues[filter.fDataLocation]; 1.334 +} 1.335 + 1.336 +void BGRAConvolve2D(const unsigned char* sourceData, 1.337 + int sourceByteRowStride, 1.338 + bool sourceHasAlpha, 1.339 + const SkConvolutionFilter1D& filterX, 1.340 + const SkConvolutionFilter1D& filterY, 1.341 + int outputByteRowStride, 1.342 + unsigned char* output, 1.343 + const SkConvolutionProcs& convolveProcs, 1.344 + bool useSimdIfPossible) { 1.345 + 1.346 + int maxYFilterSize = filterY.maxFilter(); 1.347 + 1.348 + // The next row in the input that we will generate a horizontally 1.349 + // convolved row for. If the filter doesn't start at the beginning of the 1.350 + // image (this is the case when we are only resizing a subset), then we 1.351 + // don't want to generate any output rows before that. Compute the starting 1.352 + // row for convolution as the first pixel for the first vertical filter. 1.353 + int filterOffset, filterLength; 1.354 + const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 1.355 + filterY.FilterForValue(0, &filterOffset, &filterLength); 1.356 + int nextXRow = filterOffset; 1.357 + 1.358 + // We loop over each row in the input doing a horizontal convolution. This 1.359 + // will result in a horizontally convolved image. We write the results into 1.360 + // a circular buffer of convolved rows and do vertical convolution as rows 1.361 + // are available. This prevents us from having to store the entire 1.362 + // intermediate image and helps cache coherency. 1.363 + // We will need four extra rows to allow horizontal convolution could be done 1.364 + // simultaneously. We also pad each row in row buffer to be aligned-up to 1.365 + // 16 bytes. 1.366 + // TODO(jiesun): We do not use aligned load from row buffer in vertical 1.367 + // convolution pass yet. Somehow Windows does not like it. 1.368 + int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; 1.369 + int rowBufferHeight = maxYFilterSize + 1.370 + (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); 1.371 + CircularRowBuffer rowBuffer(rowBufferWidth, 1.372 + rowBufferHeight, 1.373 + filterOffset); 1.374 + 1.375 + // Loop over every possible output row, processing just enough horizontal 1.376 + // convolutions to run each subsequent vertical convolution. 1.377 + SkASSERT(outputByteRowStride >= filterX.numValues() * 4); 1.378 + int numOutputRows = filterY.numValues(); 1.379 + 1.380 + // We need to check which is the last line to convolve before we advance 4 1.381 + // lines in one iteration. 1.382 + int lastFilterOffset, lastFilterLength; 1.383 + 1.384 + // SSE2 can access up to 3 extra pixels past the end of the 1.385 + // buffer. At the bottom of the image, we have to be careful 1.386 + // not to access data past the end of the buffer. Normally 1.387 + // we fall back to the C++ implementation for the last row. 1.388 + // If the last row is less than 3 pixels wide, we may have to fall 1.389 + // back to the C++ version for more rows. Compute how many 1.390 + // rows we need to avoid the SSE implementation for here. 1.391 + filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, 1.392 + &lastFilterLength); 1.393 + int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / 1.394 + (lastFilterOffset + lastFilterLength); 1.395 + 1.396 + filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, 1.397 + &lastFilterLength); 1.398 + 1.399 + for (int outY = 0; outY < numOutputRows; outY++) { 1.400 + filterValues = filterY.FilterForValue(outY, 1.401 + &filterOffset, &filterLength); 1.402 + 1.403 + // Generate output rows until we have enough to run the current filter. 1.404 + while (nextXRow < filterOffset + filterLength) { 1.405 + if (convolveProcs.fConvolve4RowsHorizontally && 1.406 + nextXRow + 3 < lastFilterOffset + lastFilterLength - 1.407 + avoidSimdRows) { 1.408 + const unsigned char* src[4]; 1.409 + unsigned char* outRow[4]; 1.410 + for (int i = 0; i < 4; ++i) { 1.411 + src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride]; 1.412 + outRow[i] = rowBuffer.advanceRow(); 1.413 + } 1.414 + convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow); 1.415 + nextXRow += 4; 1.416 + } else { 1.417 + // Check if we need to avoid SSE2 for this row. 1.418 + if (convolveProcs.fConvolveHorizontally && 1.419 + nextXRow < lastFilterOffset + lastFilterLength - 1.420 + avoidSimdRows) { 1.421 + convolveProcs.fConvolveHorizontally( 1.422 + &sourceData[nextXRow * sourceByteRowStride], 1.423 + filterX, rowBuffer.advanceRow(), sourceHasAlpha); 1.424 + } else { 1.425 + if (sourceHasAlpha) { 1.426 + ConvolveHorizontally<true>( 1.427 + &sourceData[nextXRow * sourceByteRowStride], 1.428 + filterX, rowBuffer.advanceRow()); 1.429 + } else { 1.430 + ConvolveHorizontally<false>( 1.431 + &sourceData[nextXRow * sourceByteRowStride], 1.432 + filterX, rowBuffer.advanceRow()); 1.433 + } 1.434 + } 1.435 + nextXRow++; 1.436 + } 1.437 + } 1.438 + 1.439 + // Compute where in the output image this row of final data will go. 1.440 + unsigned char* curOutputRow = &output[outY * outputByteRowStride]; 1.441 + 1.442 + // Get the list of rows that the circular buffer has, in order. 1.443 + int firstRowInCircularBuffer; 1.444 + unsigned char* const* rowsToConvolve = 1.445 + rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); 1.446 + 1.447 + // Now compute the start of the subset of those rows that the filter 1.448 + // needs. 1.449 + unsigned char* const* firstRowForFilter = 1.450 + &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; 1.451 + 1.452 + if (convolveProcs.fConvolveVertically) { 1.453 + convolveProcs.fConvolveVertically(filterValues, filterLength, 1.454 + firstRowForFilter, 1.455 + filterX.numValues(), curOutputRow, 1.456 + sourceHasAlpha); 1.457 + } else { 1.458 + ConvolveVertically(filterValues, filterLength, 1.459 + firstRowForFilter, 1.460 + filterX.numValues(), curOutputRow, 1.461 + sourceHasAlpha); 1.462 + } 1.463 + } 1.464 +}