|
1 |
|
2 /* |
|
3 * Copyright 2006 The Android Open Source Project |
|
4 * |
|
5 * Use of this source code is governed by a BSD-style license that can be |
|
6 * found in the LICENSE file. |
|
7 */ |
|
8 |
|
9 |
|
10 #ifndef SkMath_DEFINED |
|
11 #define SkMath_DEFINED |
|
12 |
|
13 #include "SkTypes.h" |
|
14 |
|
15 /** |
|
16 * Computes numer1 * numer2 / denom in full 64 intermediate precision. |
|
17 * It is an error for denom to be 0. There is no special handling if |
|
18 * the result overflows 32bits. |
|
19 */ |
|
20 int32_t SkMulDiv(int32_t numer1, int32_t numer2, int32_t denom); |
|
21 |
|
22 /** |
|
23 * Computes (numer1 << shift) / denom in full 64 intermediate precision. |
|
24 * It is an error for denom to be 0. There is no special handling if |
|
25 * the result overflows 32bits. |
|
26 */ |
|
27 int32_t SkDivBits(int32_t numer, int32_t denom, int shift); |
|
28 |
|
29 /** |
|
30 * Return the integer square root of value, with a bias of bitBias |
|
31 */ |
|
32 int32_t SkSqrtBits(int32_t value, int bitBias); |
|
33 |
|
34 /** Return the integer square root of n, treated as a SkFixed (16.16) |
|
35 */ |
|
36 #define SkSqrt32(n) SkSqrtBits(n, 15) |
|
37 |
|
38 // 64bit -> 32bit utilities |
|
39 |
|
40 /** |
|
41 * Return true iff the 64bit value can exactly be represented in signed 32bits |
|
42 */ |
|
43 static inline bool sk_64_isS32(int64_t value) { |
|
44 return (int32_t)value == value; |
|
45 } |
|
46 |
|
47 /** |
|
48 * Return the 64bit argument as signed 32bits, asserting in debug that the arg |
|
49 * exactly fits in signed 32bits. In the release build, no checks are preformed |
|
50 * and the return value if the arg does not fit is undefined. |
|
51 */ |
|
52 static inline int32_t sk_64_asS32(int64_t value) { |
|
53 SkASSERT(sk_64_isS32(value)); |
|
54 return (int32_t)value; |
|
55 } |
|
56 |
|
57 // Handy util that can be passed two ints, and will automatically promote to |
|
58 // 64bits before the multiply, so the caller doesn't have to remember to cast |
|
59 // e.g. (int64_t)a * b; |
|
60 static inline int64_t sk_64_mul(int64_t a, int64_t b) { |
|
61 return a * b; |
|
62 } |
|
63 |
|
64 /////////////////////////////////////////////////////////////////////////////// |
|
65 |
|
66 //! Returns the number of leading zero bits (0...32) |
|
67 int SkCLZ_portable(uint32_t); |
|
68 |
|
69 #ifndef SkCLZ |
|
70 #if defined(_MSC_VER) && _MSC_VER >= 1400 |
|
71 #include <intrin.h> |
|
72 |
|
73 static inline int SkCLZ(uint32_t mask) { |
|
74 if (mask) { |
|
75 DWORD index; |
|
76 _BitScanReverse(&index, mask); |
|
77 return index ^ 0x1F; |
|
78 } else { |
|
79 return 32; |
|
80 } |
|
81 } |
|
82 #elif defined(SK_CPU_ARM) || defined(__GNUC__) || defined(__clang__) |
|
83 static inline int SkCLZ(uint32_t mask) { |
|
84 // __builtin_clz(0) is undefined, so we have to detect that case. |
|
85 return mask ? __builtin_clz(mask) : 32; |
|
86 } |
|
87 #else |
|
88 #define SkCLZ(x) SkCLZ_portable(x) |
|
89 #endif |
|
90 #endif |
|
91 |
|
92 /** |
|
93 * Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches) |
|
94 */ |
|
95 static inline int SkClampPos(int value) { |
|
96 return value & ~(value >> 31); |
|
97 } |
|
98 |
|
99 /** Given an integer and a positive (max) integer, return the value |
|
100 * pinned against 0 and max, inclusive. |
|
101 * @param value The value we want returned pinned between [0...max] |
|
102 * @param max The positive max value |
|
103 * @return 0 if value < 0, max if value > max, else value |
|
104 */ |
|
105 static inline int SkClampMax(int value, int max) { |
|
106 // ensure that max is positive |
|
107 SkASSERT(max >= 0); |
|
108 if (value < 0) { |
|
109 value = 0; |
|
110 } |
|
111 if (value > max) { |
|
112 value = max; |
|
113 } |
|
114 return value; |
|
115 } |
|
116 |
|
117 /** |
|
118 * Returns the smallest power-of-2 that is >= the specified value. If value |
|
119 * is already a power of 2, then it is returned unchanged. It is undefined |
|
120 * if value is <= 0. |
|
121 */ |
|
122 static inline int SkNextPow2(int value) { |
|
123 SkASSERT(value > 0); |
|
124 return 1 << (32 - SkCLZ(value - 1)); |
|
125 } |
|
126 |
|
127 /** |
|
128 * Returns the log2 of the specified value, were that value to be rounded up |
|
129 * to the next power of 2. It is undefined to pass 0. Examples: |
|
130 * SkNextLog2(1) -> 0 |
|
131 * SkNextLog2(2) -> 1 |
|
132 * SkNextLog2(3) -> 2 |
|
133 * SkNextLog2(4) -> 2 |
|
134 * SkNextLog2(5) -> 3 |
|
135 */ |
|
136 static inline int SkNextLog2(uint32_t value) { |
|
137 SkASSERT(value != 0); |
|
138 return 32 - SkCLZ(value - 1); |
|
139 } |
|
140 |
|
141 /** |
|
142 * Returns true if value is a power of 2. Does not explicitly check for |
|
143 * value <= 0. |
|
144 */ |
|
145 static inline bool SkIsPow2(int value) { |
|
146 return (value & (value - 1)) == 0; |
|
147 } |
|
148 |
|
149 /////////////////////////////////////////////////////////////////////////////// |
|
150 |
|
151 /** |
|
152 * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t. |
|
153 * With this requirement, we can generate faster instructions on some |
|
154 * architectures. |
|
155 */ |
|
156 #ifdef SK_ARM_HAS_EDSP |
|
157 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { |
|
158 SkASSERT((int16_t)x == x); |
|
159 SkASSERT((int16_t)y == y); |
|
160 int32_t product; |
|
161 asm("smulbb %0, %1, %2 \n" |
|
162 : "=r"(product) |
|
163 : "r"(x), "r"(y) |
|
164 ); |
|
165 return product; |
|
166 } |
|
167 #else |
|
168 #ifdef SK_DEBUG |
|
169 static inline int32_t SkMulS16(S16CPU x, S16CPU y) { |
|
170 SkASSERT((int16_t)x == x); |
|
171 SkASSERT((int16_t)y == y); |
|
172 return x * y; |
|
173 } |
|
174 #else |
|
175 #define SkMulS16(x, y) ((x) * (y)) |
|
176 #endif |
|
177 #endif |
|
178 |
|
179 /** |
|
180 * Return a*b/((1 << shift) - 1), rounding any fractional bits. |
|
181 * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 |
|
182 */ |
|
183 static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { |
|
184 SkASSERT(a <= 32767); |
|
185 SkASSERT(b <= 32767); |
|
186 SkASSERT(shift > 0 && shift <= 8); |
|
187 unsigned prod = SkMulS16(a, b) + (1 << (shift - 1)); |
|
188 return (prod + (prod >> shift)) >> shift; |
|
189 } |
|
190 |
|
191 /** |
|
192 * Return a*b/255, rounding any fractional bits. |
|
193 * Only valid if a and b are unsigned and <= 32767. |
|
194 */ |
|
195 static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { |
|
196 SkASSERT(a <= 32767); |
|
197 SkASSERT(b <= 32767); |
|
198 unsigned prod = SkMulS16(a, b) + 128; |
|
199 return (prod + (prod >> 8)) >> 8; |
|
200 } |
|
201 |
|
202 /** |
|
203 * Stores numer/denom and numer%denom into div and mod respectively. |
|
204 */ |
|
205 template <typename In, typename Out> |
|
206 inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { |
|
207 #ifdef SK_CPU_ARM |
|
208 // If we wrote this as in the else branch, GCC won't fuse the two into one |
|
209 // divmod call, but rather a div call followed by a divmod. Silly! This |
|
210 // version is just as fast as calling __aeabi_[u]idivmod manually, but with |
|
211 // prettier code. |
|
212 // |
|
213 // This benches as around 2x faster than the code in the else branch. |
|
214 const In d = numer/denom; |
|
215 *div = static_cast<Out>(d); |
|
216 *mod = static_cast<Out>(numer-d*denom); |
|
217 #else |
|
218 // On x86 this will just be a single idiv. |
|
219 *div = static_cast<Out>(numer/denom); |
|
220 *mod = static_cast<Out>(numer%denom); |
|
221 #endif // SK_CPU_ARM |
|
222 } |
|
223 |
|
224 #endif |