|
1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola |
|
2 * |
|
3 * Use of this source code is governed by a BSD-style license that can be |
|
4 * found in the LICENSE file. |
|
5 */ |
|
6 |
|
7 #include "SkBitmapProcState.h" |
|
8 #include "SkPerspIter.h" |
|
9 #include "SkShader.h" |
|
10 #include "SkUtils.h" |
|
11 #include "SkUtilsArm.h" |
|
12 #include "SkBitmapProcState_utils.h" |
|
13 |
|
14 /* returns 0...(n-1) given any x (positive or negative). |
|
15 |
|
16 As an example, if n (which is always positive) is 5... |
|
17 |
|
18 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 |
|
19 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 |
|
20 */ |
|
21 static inline int sk_int_mod(int x, int n) { |
|
22 SkASSERT(n > 0); |
|
23 if ((unsigned)x >= (unsigned)n) { |
|
24 if (x < 0) { |
|
25 x = n + ~(~x % n); |
|
26 } else { |
|
27 x = x % n; |
|
28 } |
|
29 } |
|
30 return x; |
|
31 } |
|
32 |
|
33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); |
|
34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); |
|
35 |
|
36 // Compile neon code paths if needed |
|
37 #if !SK_ARM_NEON_IS_NONE |
|
38 |
|
39 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp |
|
40 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; |
|
41 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; |
|
42 |
|
43 #endif // !SK_ARM_NEON_IS_NONE |
|
44 |
|
45 // Compile non-neon code path if needed |
|
46 #if !SK_ARM_NEON_IS_ALWAYS |
|
47 #define MAKENAME(suffix) ClampX_ClampY ## suffix |
|
48 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) |
|
49 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) |
|
50 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) |
|
51 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) |
|
52 #define CHECK_FOR_DECAL |
|
53 #include "SkBitmapProcState_matrix.h" |
|
54 |
|
55 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix |
|
56 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) |
|
57 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) |
|
58 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) |
|
59 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) |
|
60 #include "SkBitmapProcState_matrix.h" |
|
61 #endif |
|
62 |
|
63 #define MAKENAME(suffix) GeneralXY ## suffix |
|
64 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ |
|
65 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ |
|
66 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ |
|
67 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY |
|
68 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX |
|
69 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY |
|
70 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX |
|
71 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY |
|
72 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) |
|
73 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) |
|
74 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) |
|
75 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) |
|
76 #include "SkBitmapProcState_matrix.h" |
|
77 |
|
78 static inline U16CPU fixed_clamp(SkFixed x) |
|
79 { |
|
80 if (x < 0) { |
|
81 x = 0; |
|
82 } |
|
83 if (x >> 16) { |
|
84 x = 0xFFFF; |
|
85 } |
|
86 return x; |
|
87 } |
|
88 |
|
89 static inline U16CPU fixed_repeat(SkFixed x) |
|
90 { |
|
91 return x & 0xFFFF; |
|
92 } |
|
93 |
|
94 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. |
|
95 // See http://code.google.com/p/skia/issues/detail?id=472 |
|
96 #if defined(_MSC_VER) && (_MSC_VER >= 1600) |
|
97 #pragma optimize("", off) |
|
98 #endif |
|
99 |
|
100 static inline U16CPU fixed_mirror(SkFixed x) |
|
101 { |
|
102 SkFixed s = x << 15 >> 31; |
|
103 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval |
|
104 return (x ^ s) & 0xFFFF; |
|
105 } |
|
106 |
|
107 #if defined(_MSC_VER) && (_MSC_VER >= 1600) |
|
108 #pragma optimize("", on) |
|
109 #endif |
|
110 |
|
111 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) |
|
112 { |
|
113 if (SkShader::kClamp_TileMode == m) |
|
114 return fixed_clamp; |
|
115 if (SkShader::kRepeat_TileMode == m) |
|
116 return fixed_repeat; |
|
117 SkASSERT(SkShader::kMirror_TileMode == m); |
|
118 return fixed_mirror; |
|
119 } |
|
120 |
|
121 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { |
|
122 return (x >> 12) & 0xF; |
|
123 } |
|
124 |
|
125 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { |
|
126 return ((x * scale) >> 12) & 0xF; |
|
127 } |
|
128 |
|
129 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { |
|
130 if (SkShader::kClamp_TileMode == m) { |
|
131 return fixed_clamp_lowbits; |
|
132 } else { |
|
133 SkASSERT(SkShader::kMirror_TileMode == m || |
|
134 SkShader::kRepeat_TileMode == m); |
|
135 // mirror and repeat have the same behavior for the low bits. |
|
136 return fixed_repeat_or_mirrow_lowbits; |
|
137 } |
|
138 } |
|
139 |
|
140 static inline U16CPU int_clamp(int x, int n) { |
|
141 if (x >= n) { |
|
142 x = n - 1; |
|
143 } |
|
144 if (x < 0) { |
|
145 x = 0; |
|
146 } |
|
147 return x; |
|
148 } |
|
149 |
|
150 static inline U16CPU int_repeat(int x, int n) { |
|
151 return sk_int_mod(x, n); |
|
152 } |
|
153 |
|
154 static inline U16CPU int_mirror(int x, int n) { |
|
155 x = sk_int_mod(x, 2 * n); |
|
156 if (x >= n) { |
|
157 x = n + ~(x - n); |
|
158 } |
|
159 return x; |
|
160 } |
|
161 |
|
162 #if 0 |
|
163 static void test_int_tileprocs() { |
|
164 for (int i = -8; i <= 8; i++) { |
|
165 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); |
|
166 } |
|
167 } |
|
168 #endif |
|
169 |
|
170 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { |
|
171 if (SkShader::kClamp_TileMode == tm) |
|
172 return int_clamp; |
|
173 if (SkShader::kRepeat_TileMode == tm) |
|
174 return int_repeat; |
|
175 SkASSERT(SkShader::kMirror_TileMode == tm); |
|
176 return int_mirror; |
|
177 } |
|
178 |
|
179 ////////////////////////////////////////////////////////////////////////////// |
|
180 |
|
181 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) |
|
182 { |
|
183 int i; |
|
184 |
|
185 for (i = (count >> 2); i > 0; --i) |
|
186 { |
|
187 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); |
|
188 fx += dx+dx; |
|
189 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); |
|
190 fx += dx+dx; |
|
191 } |
|
192 count &= 3; |
|
193 |
|
194 uint16_t* xx = (uint16_t*)dst; |
|
195 for (i = count; i > 0; --i) { |
|
196 *xx++ = SkToU16(fx >> 16); fx += dx; |
|
197 } |
|
198 } |
|
199 |
|
200 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) |
|
201 { |
|
202 |
|
203 |
|
204 if (count & 1) |
|
205 { |
|
206 SkASSERT((fx >> (16 + 14)) == 0); |
|
207 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); |
|
208 fx += dx; |
|
209 } |
|
210 while ((count -= 2) >= 0) |
|
211 { |
|
212 SkASSERT((fx >> (16 + 14)) == 0); |
|
213 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); |
|
214 fx += dx; |
|
215 |
|
216 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); |
|
217 fx += dx; |
|
218 } |
|
219 } |
|
220 |
|
221 /////////////////////////////////////////////////////////////////////////////// |
|
222 // stores the same as SCALE, but is cheaper to compute. Also since there is no |
|
223 // scale, we don't need/have a FILTER version |
|
224 |
|
225 static void fill_sequential(uint16_t xptr[], int start, int count) { |
|
226 #if 1 |
|
227 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { |
|
228 *xptr++ = start++; |
|
229 count -= 1; |
|
230 } |
|
231 if (count > 3) { |
|
232 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); |
|
233 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); |
|
234 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); |
|
235 start += count & ~3; |
|
236 int qcount = count >> 2; |
|
237 do { |
|
238 *xxptr++ = pattern0; |
|
239 pattern0 += 0x40004; |
|
240 *xxptr++ = pattern1; |
|
241 pattern1 += 0x40004; |
|
242 } while (--qcount != 0); |
|
243 xptr = reinterpret_cast<uint16_t*>(xxptr); |
|
244 count &= 3; |
|
245 } |
|
246 while (--count >= 0) { |
|
247 *xptr++ = start++; |
|
248 } |
|
249 #else |
|
250 for (int i = 0; i < count; i++) { |
|
251 *xptr++ = start++; |
|
252 } |
|
253 #endif |
|
254 } |
|
255 |
|
256 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, |
|
257 int x, int y) { |
|
258 SkPoint pt; |
|
259 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, |
|
260 SkIntToScalar(y) + SK_ScalarHalf, &pt); |
|
261 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16, |
|
262 s.fBitmap->height()); |
|
263 *xy += 1; // bump the ptr |
|
264 // return our starting X position |
|
265 return SkScalarToFixed(pt.fX) >> 16; |
|
266 } |
|
267 |
|
268 static void clampx_nofilter_trans(const SkBitmapProcState& s, |
|
269 uint32_t xy[], int count, int x, int y) { |
|
270 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); |
|
271 |
|
272 int xpos = nofilter_trans_preamble(s, &xy, x, y); |
|
273 const int width = s.fBitmap->width(); |
|
274 if (1 == width) { |
|
275 // all of the following X values must be 0 |
|
276 memset(xy, 0, count * sizeof(uint16_t)); |
|
277 return; |
|
278 } |
|
279 |
|
280 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); |
|
281 int n; |
|
282 |
|
283 // fill before 0 as needed |
|
284 if (xpos < 0) { |
|
285 n = -xpos; |
|
286 if (n > count) { |
|
287 n = count; |
|
288 } |
|
289 memset(xptr, 0, n * sizeof(uint16_t)); |
|
290 count -= n; |
|
291 if (0 == count) { |
|
292 return; |
|
293 } |
|
294 xptr += n; |
|
295 xpos = 0; |
|
296 } |
|
297 |
|
298 // fill in 0..width-1 if needed |
|
299 if (xpos < width) { |
|
300 n = width - xpos; |
|
301 if (n > count) { |
|
302 n = count; |
|
303 } |
|
304 fill_sequential(xptr, xpos, n); |
|
305 count -= n; |
|
306 if (0 == count) { |
|
307 return; |
|
308 } |
|
309 xptr += n; |
|
310 } |
|
311 |
|
312 // fill the remaining with the max value |
|
313 sk_memset16(xptr, width - 1, count); |
|
314 } |
|
315 |
|
316 static void repeatx_nofilter_trans(const SkBitmapProcState& s, |
|
317 uint32_t xy[], int count, int x, int y) { |
|
318 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); |
|
319 |
|
320 int xpos = nofilter_trans_preamble(s, &xy, x, y); |
|
321 const int width = s.fBitmap->width(); |
|
322 if (1 == width) { |
|
323 // all of the following X values must be 0 |
|
324 memset(xy, 0, count * sizeof(uint16_t)); |
|
325 return; |
|
326 } |
|
327 |
|
328 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); |
|
329 int start = sk_int_mod(xpos, width); |
|
330 int n = width - start; |
|
331 if (n > count) { |
|
332 n = count; |
|
333 } |
|
334 fill_sequential(xptr, start, n); |
|
335 xptr += n; |
|
336 count -= n; |
|
337 |
|
338 while (count >= width) { |
|
339 fill_sequential(xptr, 0, width); |
|
340 xptr += width; |
|
341 count -= width; |
|
342 } |
|
343 |
|
344 if (count > 0) { |
|
345 fill_sequential(xptr, 0, count); |
|
346 } |
|
347 } |
|
348 |
|
349 static void fill_backwards(uint16_t xptr[], int pos, int count) { |
|
350 for (int i = 0; i < count; i++) { |
|
351 SkASSERT(pos >= 0); |
|
352 xptr[i] = pos--; |
|
353 } |
|
354 } |
|
355 |
|
356 static void mirrorx_nofilter_trans(const SkBitmapProcState& s, |
|
357 uint32_t xy[], int count, int x, int y) { |
|
358 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); |
|
359 |
|
360 int xpos = nofilter_trans_preamble(s, &xy, x, y); |
|
361 const int width = s.fBitmap->width(); |
|
362 if (1 == width) { |
|
363 // all of the following X values must be 0 |
|
364 memset(xy, 0, count * sizeof(uint16_t)); |
|
365 return; |
|
366 } |
|
367 |
|
368 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); |
|
369 // need to know our start, and our initial phase (forward or backward) |
|
370 bool forward; |
|
371 int n; |
|
372 int start = sk_int_mod(xpos, 2 * width); |
|
373 if (start >= width) { |
|
374 start = width + ~(start - width); |
|
375 forward = false; |
|
376 n = start + 1; // [start .. 0] |
|
377 } else { |
|
378 forward = true; |
|
379 n = width - start; // [start .. width) |
|
380 } |
|
381 if (n > count) { |
|
382 n = count; |
|
383 } |
|
384 if (forward) { |
|
385 fill_sequential(xptr, start, n); |
|
386 } else { |
|
387 fill_backwards(xptr, start, n); |
|
388 } |
|
389 forward = !forward; |
|
390 xptr += n; |
|
391 count -= n; |
|
392 |
|
393 while (count >= width) { |
|
394 if (forward) { |
|
395 fill_sequential(xptr, 0, width); |
|
396 } else { |
|
397 fill_backwards(xptr, width - 1, width); |
|
398 } |
|
399 forward = !forward; |
|
400 xptr += width; |
|
401 count -= width; |
|
402 } |
|
403 |
|
404 if (count > 0) { |
|
405 if (forward) { |
|
406 fill_sequential(xptr, 0, count); |
|
407 } else { |
|
408 fill_backwards(xptr, width - 1, count); |
|
409 } |
|
410 } |
|
411 } |
|
412 |
|
413 /////////////////////////////////////////////////////////////////////////////// |
|
414 |
|
415 SkBitmapProcState::MatrixProc |
|
416 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { |
|
417 // test_int_tileprocs(); |
|
418 // check for our special case when there is no scale/affine/perspective |
|
419 if (trivial_matrix) { |
|
420 SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel); |
|
421 fIntTileProcY = choose_int_tile_proc(fTileModeY); |
|
422 switch (fTileModeX) { |
|
423 case SkShader::kClamp_TileMode: |
|
424 return clampx_nofilter_trans; |
|
425 case SkShader::kRepeat_TileMode: |
|
426 return repeatx_nofilter_trans; |
|
427 case SkShader::kMirror_TileMode: |
|
428 return mirrorx_nofilter_trans; |
|
429 } |
|
430 } |
|
431 |
|
432 int index = 0; |
|
433 if (fFilterLevel != SkPaint::kNone_FilterLevel) { |
|
434 index = 1; |
|
435 } |
|
436 if (fInvType & SkMatrix::kPerspective_Mask) { |
|
437 index += 4; |
|
438 } else if (fInvType & SkMatrix::kAffine_Mask) { |
|
439 index += 2; |
|
440 } |
|
441 |
|
442 if (SkShader::kClamp_TileMode == fTileModeX && |
|
443 SkShader::kClamp_TileMode == fTileModeY) |
|
444 { |
|
445 // clamp gets special version of filterOne |
|
446 fFilterOneX = SK_Fixed1; |
|
447 fFilterOneY = SK_Fixed1; |
|
448 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; |
|
449 } |
|
450 |
|
451 // all remaining procs use this form for filterOne |
|
452 fFilterOneX = SK_Fixed1 / fBitmap->width(); |
|
453 fFilterOneY = SK_Fixed1 / fBitmap->height(); |
|
454 |
|
455 if (SkShader::kRepeat_TileMode == fTileModeX && |
|
456 SkShader::kRepeat_TileMode == fTileModeY) |
|
457 { |
|
458 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; |
|
459 } |
|
460 |
|
461 fTileProcX = choose_tile_proc(fTileModeX); |
|
462 fTileProcY = choose_tile_proc(fTileModeY); |
|
463 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); |
|
464 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); |
|
465 return GeneralXY_Procs[index]; |
|
466 } |