1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/mfbt/FloatingPoint.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,409 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +/* Various predicates and operations on IEEE-754 floating point types. */ 1.11 + 1.12 +#ifndef mozilla_FloatingPoint_h 1.13 +#define mozilla_FloatingPoint_h 1.14 + 1.15 +#include "mozilla/Assertions.h" 1.16 +#include "mozilla/Attributes.h" 1.17 +#include "mozilla/Casting.h" 1.18 +#include "mozilla/MathAlgorithms.h" 1.19 +#include "mozilla/Types.h" 1.20 + 1.21 +#include <stdint.h> 1.22 + 1.23 +namespace mozilla { 1.24 + 1.25 +/* 1.26 + * It's reasonable to ask why we have this header at all. Don't isnan, 1.27 + * copysign, the built-in comparison operators, and the like solve these 1.28 + * problems? Unfortunately, they don't. We've found that various compilers 1.29 + * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile 1.30 + * the standard methods in various situations, so we can't use them. Some of 1.31 + * these compilers even have problems compiling seemingly reasonable bitwise 1.32 + * algorithms! But with some care we've found algorithms that seem to not 1.33 + * trigger those compiler bugs. 1.34 + * 1.35 + * For the aforementioned reasons, be very wary of making changes to any of 1.36 + * these algorithms. If you must make changes, keep a careful eye out for 1.37 + * compiler bustage, particularly PGO-specific bustage. 1.38 + */ 1.39 + 1.40 +struct FloatTypeTraits 1.41 +{ 1.42 + typedef uint32_t Bits; 1.43 + 1.44 + static const unsigned ExponentBias = 127; 1.45 + static const unsigned ExponentShift = 23; 1.46 + 1.47 + static const Bits SignBit = 0x80000000UL; 1.48 + static const Bits ExponentBits = 0x7F800000UL; 1.49 + static const Bits SignificandBits = 0x007FFFFFUL; 1.50 +}; 1.51 + 1.52 +struct DoubleTypeTraits 1.53 +{ 1.54 + typedef uint64_t Bits; 1.55 + 1.56 + static const unsigned ExponentBias = 1023; 1.57 + static const unsigned ExponentShift = 52; 1.58 + 1.59 + static const Bits SignBit = 0x8000000000000000ULL; 1.60 + static const Bits ExponentBits = 0x7ff0000000000000ULL; 1.61 + static const Bits SignificandBits = 0x000fffffffffffffULL; 1.62 +}; 1.63 + 1.64 +template<typename T> struct SelectTrait; 1.65 +template<> struct SelectTrait<float> : public FloatTypeTraits {}; 1.66 +template<> struct SelectTrait<double> : public DoubleTypeTraits {}; 1.67 + 1.68 +/* 1.69 + * This struct contains details regarding the encoding of floating-point 1.70 + * numbers that can be useful for direct bit manipulation. As of now, the 1.71 + * template parameter has to be float or double. 1.72 + * 1.73 + * The nested typedef |Bits| is the unsigned integral type with the same size 1.74 + * as T: uint32_t for float and uint64_t for double (static assertions 1.75 + * double-check these assumptions). 1.76 + * 1.77 + * ExponentBias is the offset that is subtracted from the exponent when 1.78 + * computing the value, i.e. one plus the opposite of the mininum possible 1.79 + * exponent. 1.80 + * ExponentShift is the shift that one needs to apply to retrieve the exponent 1.81 + * component of the value. 1.82 + * 1.83 + * SignBit contains a bits mask. Bit-and-ing with this mask will result in 1.84 + * obtaining the sign bit. 1.85 + * ExponentBits contains the mask needed for obtaining the exponent bits and 1.86 + * SignificandBits contains the mask needed for obtaining the significand bits. 1.87 + * 1.88 + * Full details of how floating point number formats are encoded are beyond the 1.89 + * scope of this comment. For more information, see 1.90 + * http://en.wikipedia.org/wiki/IEEE_floating_point 1.91 + * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers 1.92 + */ 1.93 +template<typename T> 1.94 +struct FloatingPoint : public SelectTrait<T> 1.95 +{ 1.96 + typedef SelectTrait<T> Base; 1.97 + typedef typename Base::Bits Bits; 1.98 + 1.99 + static_assert((Base::SignBit & Base::ExponentBits) == 0, 1.100 + "sign bit shouldn't overlap exponent bits"); 1.101 + static_assert((Base::SignBit & Base::SignificandBits) == 0, 1.102 + "sign bit shouldn't overlap significand bits"); 1.103 + static_assert((Base::ExponentBits & Base::SignificandBits) == 0, 1.104 + "exponent bits shouldn't overlap significand bits"); 1.105 + 1.106 + static_assert((Base::SignBit | Base::ExponentBits | Base::SignificandBits) == 1.107 + ~Bits(0), 1.108 + "all bits accounted for"); 1.109 + 1.110 + /* 1.111 + * These implementations assume float/double are 32/64-bit single/double format 1.112 + * number types compatible with the IEEE-754 standard. C++ don't require this 1.113 + * to be the case. But we required this in implementations of these algorithms 1.114 + * that preceded this header, so we shouldn't break anything if we keep doing so. 1.115 + */ 1.116 + static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T"); 1.117 +}; 1.118 + 1.119 +/** Determines whether a double is NaN. */ 1.120 +template<typename T> 1.121 +static MOZ_ALWAYS_INLINE bool 1.122 +IsNaN(T t) 1.123 +{ 1.124 + /* 1.125 + * A float/double is NaN if all exponent bits are 1 and the significand contains at 1.126 + * least one non-zero bit. 1.127 + */ 1.128 + typedef FloatingPoint<T> Traits; 1.129 + typedef typename Traits::Bits Bits; 1.130 + Bits bits = BitwiseCast<Bits>(t); 1.131 + return (bits & Traits::ExponentBits) == Traits::ExponentBits && 1.132 + (bits & Traits::SignificandBits) != 0; 1.133 +} 1.134 + 1.135 +/** Determines whether a float/double is +Infinity or -Infinity. */ 1.136 +template<typename T> 1.137 +static MOZ_ALWAYS_INLINE bool 1.138 +IsInfinite(T t) 1.139 +{ 1.140 + /* Infinities have all exponent bits set to 1 and an all-0 significand. */ 1.141 + typedef FloatingPoint<T> Traits; 1.142 + typedef typename Traits::Bits Bits; 1.143 + Bits bits = BitwiseCast<Bits>(t); 1.144 + return (bits & ~Traits::SignBit) == Traits::ExponentBits; 1.145 +} 1.146 + 1.147 +/** Determines whether a float/double is not NaN or infinite. */ 1.148 +template<typename T> 1.149 +static MOZ_ALWAYS_INLINE bool 1.150 +IsFinite(T t) 1.151 +{ 1.152 + /* 1.153 + * NaN and Infinities are the only non-finite floats/doubles, and both have all 1.154 + * exponent bits set to 1. 1.155 + */ 1.156 + typedef FloatingPoint<T> Traits; 1.157 + typedef typename Traits::Bits Bits; 1.158 + Bits bits = BitwiseCast<Bits>(t); 1.159 + return (bits & Traits::ExponentBits) != Traits::ExponentBits; 1.160 +} 1.161 + 1.162 +/** 1.163 + * Determines whether a float/double is negative. It is an error to call this method 1.164 + * on a float/double which is NaN. 1.165 + */ 1.166 +template<typename T> 1.167 +static MOZ_ALWAYS_INLINE bool 1.168 +IsNegative(T t) 1.169 +{ 1.170 + MOZ_ASSERT(!IsNaN(t), "NaN does not have a sign"); 1.171 + 1.172 + /* The sign bit is set if the double is negative. */ 1.173 + typedef FloatingPoint<T> Traits; 1.174 + typedef typename Traits::Bits Bits; 1.175 + Bits bits = BitwiseCast<Bits>(t); 1.176 + return (bits & Traits::SignBit) != 0; 1.177 +} 1.178 + 1.179 +/** Determines whether a float/double represents -0. */ 1.180 +template<typename T> 1.181 +static MOZ_ALWAYS_INLINE bool 1.182 +IsNegativeZero(T t) 1.183 +{ 1.184 + /* Only the sign bit is set if the value is -0. */ 1.185 + typedef FloatingPoint<T> Traits; 1.186 + typedef typename Traits::Bits Bits; 1.187 + Bits bits = BitwiseCast<Bits>(t); 1.188 + return bits == Traits::SignBit; 1.189 +} 1.190 + 1.191 +/** 1.192 + * Returns the exponent portion of the float/double. 1.193 + * 1.194 + * Zero is not special-cased, so ExponentComponent(0.0) is 1.195 + * -int_fast16_t(Traits::ExponentBias). 1.196 + */ 1.197 +template<typename T> 1.198 +static MOZ_ALWAYS_INLINE int_fast16_t 1.199 +ExponentComponent(T t) 1.200 +{ 1.201 + /* 1.202 + * The exponent component of a float/double is an unsigned number, biased from its 1.203 + * actual value. Subtract the bias to retrieve the actual exponent. 1.204 + */ 1.205 + typedef FloatingPoint<T> Traits; 1.206 + typedef typename Traits::Bits Bits; 1.207 + Bits bits = BitwiseCast<Bits>(t); 1.208 + return int_fast16_t((bits & Traits::ExponentBits) >> Traits::ExponentShift) - 1.209 + int_fast16_t(Traits::ExponentBias); 1.210 +} 1.211 + 1.212 +/** Returns +Infinity. */ 1.213 +template<typename T> 1.214 +static MOZ_ALWAYS_INLINE T 1.215 +PositiveInfinity() 1.216 +{ 1.217 + /* 1.218 + * Positive infinity has all exponent bits set, sign bit set to 0, and no 1.219 + * significand. 1.220 + */ 1.221 + typedef FloatingPoint<T> Traits; 1.222 + return BitwiseCast<T>(Traits::ExponentBits); 1.223 +} 1.224 + 1.225 +/** Returns -Infinity. */ 1.226 +template<typename T> 1.227 +static MOZ_ALWAYS_INLINE T 1.228 +NegativeInfinity() 1.229 +{ 1.230 + /* 1.231 + * Negative infinity has all exponent bits set, sign bit set to 1, and no 1.232 + * significand. 1.233 + */ 1.234 + typedef FloatingPoint<T> Traits; 1.235 + return BitwiseCast<T>(Traits::SignBit | Traits::ExponentBits); 1.236 +} 1.237 + 1.238 + 1.239 +/** Constructs a NaN value with the specified sign bit and significand bits. */ 1.240 +template<typename T> 1.241 +static MOZ_ALWAYS_INLINE T 1.242 +SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand) 1.243 +{ 1.244 + typedef FloatingPoint<T> Traits; 1.245 + MOZ_ASSERT(signbit == 0 || signbit == 1); 1.246 + MOZ_ASSERT((significand & ~Traits::SignificandBits) == 0); 1.247 + MOZ_ASSERT(significand & Traits::SignificandBits); 1.248 + 1.249 + T t = BitwiseCast<T>((signbit ? Traits::SignBit : 0) | 1.250 + Traits::ExponentBits | 1.251 + significand); 1.252 + MOZ_ASSERT(IsNaN(t)); 1.253 + return t; 1.254 +} 1.255 + 1.256 +/** Computes the smallest non-zero positive float/double value. */ 1.257 +template<typename T> 1.258 +static MOZ_ALWAYS_INLINE T 1.259 +MinNumberValue() 1.260 +{ 1.261 + typedef FloatingPoint<T> Traits; 1.262 + typedef typename Traits::Bits Bits; 1.263 + return BitwiseCast<T>(Bits(1)); 1.264 +} 1.265 + 1.266 +/** 1.267 + * If t is equal to some int32_t value, set *i to that value and return true; 1.268 + * otherwise return false. 1.269 + * 1.270 + * Note that negative zero is "equal" to zero here. To test whether a value can 1.271 + * be losslessly converted to int32_t and back, use NumberIsInt32 instead. 1.272 + */ 1.273 +template<typename T> 1.274 +static MOZ_ALWAYS_INLINE bool 1.275 +NumberEqualsInt32(T t, int32_t* i) 1.276 +{ 1.277 + /* 1.278 + * XXX Casting a floating-point value that doesn't truncate to int32_t, to 1.279 + * int32_t, induces undefined behavior. We should definitely fix this 1.280 + * (bug 744965), but as apparently it "works" in practice, it's not a 1.281 + * pressing concern now. 1.282 + */ 1.283 + return t == (*i = int32_t(t)); 1.284 +} 1.285 + 1.286 +/** 1.287 + * If d can be converted to int32_t and back to an identical double value, 1.288 + * set *i to that value and return true; otherwise return false. 1.289 + * 1.290 + * The difference between this and NumberEqualsInt32 is that this method returns 1.291 + * false for negative zero. 1.292 + */ 1.293 +template<typename T> 1.294 +static MOZ_ALWAYS_INLINE bool 1.295 +NumberIsInt32(T t, int32_t* i) 1.296 +{ 1.297 + return !IsNegativeZero(t) && NumberEqualsInt32(t, i); 1.298 +} 1.299 + 1.300 +/** 1.301 + * Computes a NaN value. Do not use this method if you depend upon a particular 1.302 + * NaN value being returned. 1.303 + */ 1.304 +template<typename T> 1.305 +static MOZ_ALWAYS_INLINE T 1.306 +UnspecifiedNaN() 1.307 +{ 1.308 + /* 1.309 + * If we can use any quiet NaN, we might as well use the all-ones NaN, 1.310 + * since it's cheap to materialize on common platforms (such as x64, where 1.311 + * this value can be represented in a 32-bit signed immediate field, allowing 1.312 + * it to be stored to memory in a single instruction). 1.313 + */ 1.314 + typedef FloatingPoint<T> Traits; 1.315 + return SpecificNaN<T>(1, Traits::SignificandBits); 1.316 +} 1.317 + 1.318 +/** 1.319 + * Compare two doubles for equality, *without* equating -0 to +0, and equating 1.320 + * any NaN value to any other NaN value. (The normal equality operators equate 1.321 + * -0 with +0, and they equate NaN to no other value.) 1.322 + */ 1.323 +template<typename T> 1.324 +static inline bool 1.325 +NumbersAreIdentical(T t1, T t2) 1.326 +{ 1.327 + typedef FloatingPoint<T> Traits; 1.328 + typedef typename Traits::Bits Bits; 1.329 + if (IsNaN(t1)) 1.330 + return IsNaN(t2); 1.331 + return BitwiseCast<Bits>(t1) == BitwiseCast<Bits>(t2); 1.332 +} 1.333 + 1.334 +namespace detail { 1.335 + 1.336 +template<typename T> 1.337 +struct FuzzyEqualsEpsilon; 1.338 + 1.339 +template<> 1.340 +struct FuzzyEqualsEpsilon<float> 1.341 +{ 1.342 + // A number near 1e-5 that is exactly representable in 1.343 + // floating point 1.344 + static const float value() { return 1.0f / (1 << 17); } 1.345 +}; 1.346 + 1.347 +template<> 1.348 +struct FuzzyEqualsEpsilon<double> 1.349 +{ 1.350 + // A number near 1e-12 that is exactly representable in 1.351 + // a double 1.352 + static const double value() { return 1.0 / (1LL << 40); } 1.353 +}; 1.354 + 1.355 +} // namespace detail 1.356 + 1.357 +/** 1.358 + * Compare two floating point values for equality, modulo rounding error. That 1.359 + * is, the two values are considered equal if they are both not NaN and if they 1.360 + * are less than or equal to epsilon apart. The default value of epsilon is near 1.361 + * 1e-5. 1.362 + * 1.363 + * For most scenarios you will want to use FuzzyEqualsMultiplicative instead, 1.364 + * as it is more reasonable over the entire range of floating point numbers. 1.365 + * This additive version should only be used if you know the range of the numbers 1.366 + * you are dealing with is bounded and stays around the same order of magnitude. 1.367 + */ 1.368 +template<typename T> 1.369 +static MOZ_ALWAYS_INLINE bool 1.370 +FuzzyEqualsAdditive(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon<T>::value()) 1.371 +{ 1.372 + static_assert(IsFloatingPoint<T>::value, "floating point type required"); 1.373 + return Abs(val1 - val2) <= epsilon; 1.374 +} 1.375 + 1.376 +/** 1.377 + * Compare two floating point values for equality, allowing for rounding error 1.378 + * relative to the magnitude of the values. That is, the two values are 1.379 + * considered equal if they are both not NaN and they are less than or equal to 1.380 + * some epsilon apart, where the epsilon is scaled by the smaller of the two 1.381 + * argument values. 1.382 + * 1.383 + * In most cases you will want to use this rather than FuzzyEqualsAdditive, as 1.384 + * this function effectively masks out differences in the bottom few bits of 1.385 + * the floating point numbers being compared, regardless of what order of magnitude 1.386 + * those numbers are at. 1.387 + */ 1.388 +template<typename T> 1.389 +static MOZ_ALWAYS_INLINE bool 1.390 +FuzzyEqualsMultiplicative(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon<T>::value()) 1.391 +{ 1.392 + static_assert(IsFloatingPoint<T>::value, "floating point type required"); 1.393 + // can't use std::min because of bug 965340 1.394 + T smaller = Abs(val1) < Abs(val2) ? Abs(val1) : Abs(val2); 1.395 + return Abs(val1 - val2) <= epsilon * smaller; 1.396 +} 1.397 + 1.398 +/** 1.399 + * Returns true if the given value can be losslessly represented as an IEEE-754 1.400 + * single format number, false otherwise. All NaN values are considered 1.401 + * representable (notwithstanding that the exact bit pattern of a double format 1.402 + * NaN value can't be exactly represented in single format). 1.403 + * 1.404 + * This function isn't inlined to avoid buggy optimizations by MSVC. 1.405 + */ 1.406 +MOZ_WARN_UNUSED_RESULT 1.407 +extern MFBT_API bool 1.408 +IsFloat32Representable(double x); 1.409 + 1.410 +} /* namespace mozilla */ 1.411 + 1.412 +#endif /* mozilla_FloatingPoint_h */