|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 /* Various predicates and operations on IEEE-754 floating point types. */ |
|
8 |
|
9 #ifndef mozilla_FloatingPoint_h |
|
10 #define mozilla_FloatingPoint_h |
|
11 |
|
12 #include "mozilla/Assertions.h" |
|
13 #include "mozilla/Attributes.h" |
|
14 #include "mozilla/Casting.h" |
|
15 #include "mozilla/MathAlgorithms.h" |
|
16 #include "mozilla/Types.h" |
|
17 |
|
18 #include <stdint.h> |
|
19 |
|
20 namespace mozilla { |
|
21 |
|
22 /* |
|
23 * It's reasonable to ask why we have this header at all. Don't isnan, |
|
24 * copysign, the built-in comparison operators, and the like solve these |
|
25 * problems? Unfortunately, they don't. We've found that various compilers |
|
26 * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile |
|
27 * the standard methods in various situations, so we can't use them. Some of |
|
28 * these compilers even have problems compiling seemingly reasonable bitwise |
|
29 * algorithms! But with some care we've found algorithms that seem to not |
|
30 * trigger those compiler bugs. |
|
31 * |
|
32 * For the aforementioned reasons, be very wary of making changes to any of |
|
33 * these algorithms. If you must make changes, keep a careful eye out for |
|
34 * compiler bustage, particularly PGO-specific bustage. |
|
35 */ |
|
36 |
|
37 struct FloatTypeTraits |
|
38 { |
|
39 typedef uint32_t Bits; |
|
40 |
|
41 static const unsigned ExponentBias = 127; |
|
42 static const unsigned ExponentShift = 23; |
|
43 |
|
44 static const Bits SignBit = 0x80000000UL; |
|
45 static const Bits ExponentBits = 0x7F800000UL; |
|
46 static const Bits SignificandBits = 0x007FFFFFUL; |
|
47 }; |
|
48 |
|
49 struct DoubleTypeTraits |
|
50 { |
|
51 typedef uint64_t Bits; |
|
52 |
|
53 static const unsigned ExponentBias = 1023; |
|
54 static const unsigned ExponentShift = 52; |
|
55 |
|
56 static const Bits SignBit = 0x8000000000000000ULL; |
|
57 static const Bits ExponentBits = 0x7ff0000000000000ULL; |
|
58 static const Bits SignificandBits = 0x000fffffffffffffULL; |
|
59 }; |
|
60 |
|
61 template<typename T> struct SelectTrait; |
|
62 template<> struct SelectTrait<float> : public FloatTypeTraits {}; |
|
63 template<> struct SelectTrait<double> : public DoubleTypeTraits {}; |
|
64 |
|
65 /* |
|
66 * This struct contains details regarding the encoding of floating-point |
|
67 * numbers that can be useful for direct bit manipulation. As of now, the |
|
68 * template parameter has to be float or double. |
|
69 * |
|
70 * The nested typedef |Bits| is the unsigned integral type with the same size |
|
71 * as T: uint32_t for float and uint64_t for double (static assertions |
|
72 * double-check these assumptions). |
|
73 * |
|
74 * ExponentBias is the offset that is subtracted from the exponent when |
|
75 * computing the value, i.e. one plus the opposite of the mininum possible |
|
76 * exponent. |
|
77 * ExponentShift is the shift that one needs to apply to retrieve the exponent |
|
78 * component of the value. |
|
79 * |
|
80 * SignBit contains a bits mask. Bit-and-ing with this mask will result in |
|
81 * obtaining the sign bit. |
|
82 * ExponentBits contains the mask needed for obtaining the exponent bits and |
|
83 * SignificandBits contains the mask needed for obtaining the significand bits. |
|
84 * |
|
85 * Full details of how floating point number formats are encoded are beyond the |
|
86 * scope of this comment. For more information, see |
|
87 * http://en.wikipedia.org/wiki/IEEE_floating_point |
|
88 * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers |
|
89 */ |
|
90 template<typename T> |
|
91 struct FloatingPoint : public SelectTrait<T> |
|
92 { |
|
93 typedef SelectTrait<T> Base; |
|
94 typedef typename Base::Bits Bits; |
|
95 |
|
96 static_assert((Base::SignBit & Base::ExponentBits) == 0, |
|
97 "sign bit shouldn't overlap exponent bits"); |
|
98 static_assert((Base::SignBit & Base::SignificandBits) == 0, |
|
99 "sign bit shouldn't overlap significand bits"); |
|
100 static_assert((Base::ExponentBits & Base::SignificandBits) == 0, |
|
101 "exponent bits shouldn't overlap significand bits"); |
|
102 |
|
103 static_assert((Base::SignBit | Base::ExponentBits | Base::SignificandBits) == |
|
104 ~Bits(0), |
|
105 "all bits accounted for"); |
|
106 |
|
107 /* |
|
108 * These implementations assume float/double are 32/64-bit single/double format |
|
109 * number types compatible with the IEEE-754 standard. C++ don't require this |
|
110 * to be the case. But we required this in implementations of these algorithms |
|
111 * that preceded this header, so we shouldn't break anything if we keep doing so. |
|
112 */ |
|
113 static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T"); |
|
114 }; |
|
115 |
|
116 /** Determines whether a double is NaN. */ |
|
117 template<typename T> |
|
118 static MOZ_ALWAYS_INLINE bool |
|
119 IsNaN(T t) |
|
120 { |
|
121 /* |
|
122 * A float/double is NaN if all exponent bits are 1 and the significand contains at |
|
123 * least one non-zero bit. |
|
124 */ |
|
125 typedef FloatingPoint<T> Traits; |
|
126 typedef typename Traits::Bits Bits; |
|
127 Bits bits = BitwiseCast<Bits>(t); |
|
128 return (bits & Traits::ExponentBits) == Traits::ExponentBits && |
|
129 (bits & Traits::SignificandBits) != 0; |
|
130 } |
|
131 |
|
132 /** Determines whether a float/double is +Infinity or -Infinity. */ |
|
133 template<typename T> |
|
134 static MOZ_ALWAYS_INLINE bool |
|
135 IsInfinite(T t) |
|
136 { |
|
137 /* Infinities have all exponent bits set to 1 and an all-0 significand. */ |
|
138 typedef FloatingPoint<T> Traits; |
|
139 typedef typename Traits::Bits Bits; |
|
140 Bits bits = BitwiseCast<Bits>(t); |
|
141 return (bits & ~Traits::SignBit) == Traits::ExponentBits; |
|
142 } |
|
143 |
|
144 /** Determines whether a float/double is not NaN or infinite. */ |
|
145 template<typename T> |
|
146 static MOZ_ALWAYS_INLINE bool |
|
147 IsFinite(T t) |
|
148 { |
|
149 /* |
|
150 * NaN and Infinities are the only non-finite floats/doubles, and both have all |
|
151 * exponent bits set to 1. |
|
152 */ |
|
153 typedef FloatingPoint<T> Traits; |
|
154 typedef typename Traits::Bits Bits; |
|
155 Bits bits = BitwiseCast<Bits>(t); |
|
156 return (bits & Traits::ExponentBits) != Traits::ExponentBits; |
|
157 } |
|
158 |
|
159 /** |
|
160 * Determines whether a float/double is negative. It is an error to call this method |
|
161 * on a float/double which is NaN. |
|
162 */ |
|
163 template<typename T> |
|
164 static MOZ_ALWAYS_INLINE bool |
|
165 IsNegative(T t) |
|
166 { |
|
167 MOZ_ASSERT(!IsNaN(t), "NaN does not have a sign"); |
|
168 |
|
169 /* The sign bit is set if the double is negative. */ |
|
170 typedef FloatingPoint<T> Traits; |
|
171 typedef typename Traits::Bits Bits; |
|
172 Bits bits = BitwiseCast<Bits>(t); |
|
173 return (bits & Traits::SignBit) != 0; |
|
174 } |
|
175 |
|
176 /** Determines whether a float/double represents -0. */ |
|
177 template<typename T> |
|
178 static MOZ_ALWAYS_INLINE bool |
|
179 IsNegativeZero(T t) |
|
180 { |
|
181 /* Only the sign bit is set if the value is -0. */ |
|
182 typedef FloatingPoint<T> Traits; |
|
183 typedef typename Traits::Bits Bits; |
|
184 Bits bits = BitwiseCast<Bits>(t); |
|
185 return bits == Traits::SignBit; |
|
186 } |
|
187 |
|
188 /** |
|
189 * Returns the exponent portion of the float/double. |
|
190 * |
|
191 * Zero is not special-cased, so ExponentComponent(0.0) is |
|
192 * -int_fast16_t(Traits::ExponentBias). |
|
193 */ |
|
194 template<typename T> |
|
195 static MOZ_ALWAYS_INLINE int_fast16_t |
|
196 ExponentComponent(T t) |
|
197 { |
|
198 /* |
|
199 * The exponent component of a float/double is an unsigned number, biased from its |
|
200 * actual value. Subtract the bias to retrieve the actual exponent. |
|
201 */ |
|
202 typedef FloatingPoint<T> Traits; |
|
203 typedef typename Traits::Bits Bits; |
|
204 Bits bits = BitwiseCast<Bits>(t); |
|
205 return int_fast16_t((bits & Traits::ExponentBits) >> Traits::ExponentShift) - |
|
206 int_fast16_t(Traits::ExponentBias); |
|
207 } |
|
208 |
|
209 /** Returns +Infinity. */ |
|
210 template<typename T> |
|
211 static MOZ_ALWAYS_INLINE T |
|
212 PositiveInfinity() |
|
213 { |
|
214 /* |
|
215 * Positive infinity has all exponent bits set, sign bit set to 0, and no |
|
216 * significand. |
|
217 */ |
|
218 typedef FloatingPoint<T> Traits; |
|
219 return BitwiseCast<T>(Traits::ExponentBits); |
|
220 } |
|
221 |
|
222 /** Returns -Infinity. */ |
|
223 template<typename T> |
|
224 static MOZ_ALWAYS_INLINE T |
|
225 NegativeInfinity() |
|
226 { |
|
227 /* |
|
228 * Negative infinity has all exponent bits set, sign bit set to 1, and no |
|
229 * significand. |
|
230 */ |
|
231 typedef FloatingPoint<T> Traits; |
|
232 return BitwiseCast<T>(Traits::SignBit | Traits::ExponentBits); |
|
233 } |
|
234 |
|
235 |
|
236 /** Constructs a NaN value with the specified sign bit and significand bits. */ |
|
237 template<typename T> |
|
238 static MOZ_ALWAYS_INLINE T |
|
239 SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand) |
|
240 { |
|
241 typedef FloatingPoint<T> Traits; |
|
242 MOZ_ASSERT(signbit == 0 || signbit == 1); |
|
243 MOZ_ASSERT((significand & ~Traits::SignificandBits) == 0); |
|
244 MOZ_ASSERT(significand & Traits::SignificandBits); |
|
245 |
|
246 T t = BitwiseCast<T>((signbit ? Traits::SignBit : 0) | |
|
247 Traits::ExponentBits | |
|
248 significand); |
|
249 MOZ_ASSERT(IsNaN(t)); |
|
250 return t; |
|
251 } |
|
252 |
|
253 /** Computes the smallest non-zero positive float/double value. */ |
|
254 template<typename T> |
|
255 static MOZ_ALWAYS_INLINE T |
|
256 MinNumberValue() |
|
257 { |
|
258 typedef FloatingPoint<T> Traits; |
|
259 typedef typename Traits::Bits Bits; |
|
260 return BitwiseCast<T>(Bits(1)); |
|
261 } |
|
262 |
|
263 /** |
|
264 * If t is equal to some int32_t value, set *i to that value and return true; |
|
265 * otherwise return false. |
|
266 * |
|
267 * Note that negative zero is "equal" to zero here. To test whether a value can |
|
268 * be losslessly converted to int32_t and back, use NumberIsInt32 instead. |
|
269 */ |
|
270 template<typename T> |
|
271 static MOZ_ALWAYS_INLINE bool |
|
272 NumberEqualsInt32(T t, int32_t* i) |
|
273 { |
|
274 /* |
|
275 * XXX Casting a floating-point value that doesn't truncate to int32_t, to |
|
276 * int32_t, induces undefined behavior. We should definitely fix this |
|
277 * (bug 744965), but as apparently it "works" in practice, it's not a |
|
278 * pressing concern now. |
|
279 */ |
|
280 return t == (*i = int32_t(t)); |
|
281 } |
|
282 |
|
283 /** |
|
284 * If d can be converted to int32_t and back to an identical double value, |
|
285 * set *i to that value and return true; otherwise return false. |
|
286 * |
|
287 * The difference between this and NumberEqualsInt32 is that this method returns |
|
288 * false for negative zero. |
|
289 */ |
|
290 template<typename T> |
|
291 static MOZ_ALWAYS_INLINE bool |
|
292 NumberIsInt32(T t, int32_t* i) |
|
293 { |
|
294 return !IsNegativeZero(t) && NumberEqualsInt32(t, i); |
|
295 } |
|
296 |
|
297 /** |
|
298 * Computes a NaN value. Do not use this method if you depend upon a particular |
|
299 * NaN value being returned. |
|
300 */ |
|
301 template<typename T> |
|
302 static MOZ_ALWAYS_INLINE T |
|
303 UnspecifiedNaN() |
|
304 { |
|
305 /* |
|
306 * If we can use any quiet NaN, we might as well use the all-ones NaN, |
|
307 * since it's cheap to materialize on common platforms (such as x64, where |
|
308 * this value can be represented in a 32-bit signed immediate field, allowing |
|
309 * it to be stored to memory in a single instruction). |
|
310 */ |
|
311 typedef FloatingPoint<T> Traits; |
|
312 return SpecificNaN<T>(1, Traits::SignificandBits); |
|
313 } |
|
314 |
|
315 /** |
|
316 * Compare two doubles for equality, *without* equating -0 to +0, and equating |
|
317 * any NaN value to any other NaN value. (The normal equality operators equate |
|
318 * -0 with +0, and they equate NaN to no other value.) |
|
319 */ |
|
320 template<typename T> |
|
321 static inline bool |
|
322 NumbersAreIdentical(T t1, T t2) |
|
323 { |
|
324 typedef FloatingPoint<T> Traits; |
|
325 typedef typename Traits::Bits Bits; |
|
326 if (IsNaN(t1)) |
|
327 return IsNaN(t2); |
|
328 return BitwiseCast<Bits>(t1) == BitwiseCast<Bits>(t2); |
|
329 } |
|
330 |
|
331 namespace detail { |
|
332 |
|
333 template<typename T> |
|
334 struct FuzzyEqualsEpsilon; |
|
335 |
|
336 template<> |
|
337 struct FuzzyEqualsEpsilon<float> |
|
338 { |
|
339 // A number near 1e-5 that is exactly representable in |
|
340 // floating point |
|
341 static const float value() { return 1.0f / (1 << 17); } |
|
342 }; |
|
343 |
|
344 template<> |
|
345 struct FuzzyEqualsEpsilon<double> |
|
346 { |
|
347 // A number near 1e-12 that is exactly representable in |
|
348 // a double |
|
349 static const double value() { return 1.0 / (1LL << 40); } |
|
350 }; |
|
351 |
|
352 } // namespace detail |
|
353 |
|
354 /** |
|
355 * Compare two floating point values for equality, modulo rounding error. That |
|
356 * is, the two values are considered equal if they are both not NaN and if they |
|
357 * are less than or equal to epsilon apart. The default value of epsilon is near |
|
358 * 1e-5. |
|
359 * |
|
360 * For most scenarios you will want to use FuzzyEqualsMultiplicative instead, |
|
361 * as it is more reasonable over the entire range of floating point numbers. |
|
362 * This additive version should only be used if you know the range of the numbers |
|
363 * you are dealing with is bounded and stays around the same order of magnitude. |
|
364 */ |
|
365 template<typename T> |
|
366 static MOZ_ALWAYS_INLINE bool |
|
367 FuzzyEqualsAdditive(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon<T>::value()) |
|
368 { |
|
369 static_assert(IsFloatingPoint<T>::value, "floating point type required"); |
|
370 return Abs(val1 - val2) <= epsilon; |
|
371 } |
|
372 |
|
373 /** |
|
374 * Compare two floating point values for equality, allowing for rounding error |
|
375 * relative to the magnitude of the values. That is, the two values are |
|
376 * considered equal if they are both not NaN and they are less than or equal to |
|
377 * some epsilon apart, where the epsilon is scaled by the smaller of the two |
|
378 * argument values. |
|
379 * |
|
380 * In most cases you will want to use this rather than FuzzyEqualsAdditive, as |
|
381 * this function effectively masks out differences in the bottom few bits of |
|
382 * the floating point numbers being compared, regardless of what order of magnitude |
|
383 * those numbers are at. |
|
384 */ |
|
385 template<typename T> |
|
386 static MOZ_ALWAYS_INLINE bool |
|
387 FuzzyEqualsMultiplicative(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon<T>::value()) |
|
388 { |
|
389 static_assert(IsFloatingPoint<T>::value, "floating point type required"); |
|
390 // can't use std::min because of bug 965340 |
|
391 T smaller = Abs(val1) < Abs(val2) ? Abs(val1) : Abs(val2); |
|
392 return Abs(val1 - val2) <= epsilon * smaller; |
|
393 } |
|
394 |
|
395 /** |
|
396 * Returns true if the given value can be losslessly represented as an IEEE-754 |
|
397 * single format number, false otherwise. All NaN values are considered |
|
398 * representable (notwithstanding that the exact bit pattern of a double format |
|
399 * NaN value can't be exactly represented in single format). |
|
400 * |
|
401 * This function isn't inlined to avoid buggy optimizations by MSVC. |
|
402 */ |
|
403 MOZ_WARN_UNUSED_RESULT |
|
404 extern MFBT_API bool |
|
405 IsFloat32Representable(double x); |
|
406 |
|
407 } /* namespace mozilla */ |
|
408 |
|
409 #endif /* mozilla_FloatingPoint_h */ |