michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* Various predicates and operations on IEEE-754 floating point types. */ michael@0: michael@0: #ifndef mozilla_FloatingPoint_h michael@0: #define mozilla_FloatingPoint_h michael@0: michael@0: #include "mozilla/Assertions.h" michael@0: #include "mozilla/Attributes.h" michael@0: #include "mozilla/Casting.h" michael@0: #include "mozilla/MathAlgorithms.h" michael@0: #include "mozilla/Types.h" michael@0: michael@0: #include michael@0: michael@0: namespace mozilla { michael@0: michael@0: /* michael@0: * It's reasonable to ask why we have this header at all. Don't isnan, michael@0: * copysign, the built-in comparison operators, and the like solve these michael@0: * problems? Unfortunately, they don't. We've found that various compilers michael@0: * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile michael@0: * the standard methods in various situations, so we can't use them. Some of michael@0: * these compilers even have problems compiling seemingly reasonable bitwise michael@0: * algorithms! But with some care we've found algorithms that seem to not michael@0: * trigger those compiler bugs. michael@0: * michael@0: * For the aforementioned reasons, be very wary of making changes to any of michael@0: * these algorithms. If you must make changes, keep a careful eye out for michael@0: * compiler bustage, particularly PGO-specific bustage. michael@0: */ michael@0: michael@0: struct FloatTypeTraits michael@0: { michael@0: typedef uint32_t Bits; michael@0: michael@0: static const unsigned ExponentBias = 127; michael@0: static const unsigned ExponentShift = 23; michael@0: michael@0: static const Bits SignBit = 0x80000000UL; michael@0: static const Bits ExponentBits = 0x7F800000UL; michael@0: static const Bits SignificandBits = 0x007FFFFFUL; michael@0: }; michael@0: michael@0: struct DoubleTypeTraits michael@0: { michael@0: typedef uint64_t Bits; michael@0: michael@0: static const unsigned ExponentBias = 1023; michael@0: static const unsigned ExponentShift = 52; michael@0: michael@0: static const Bits SignBit = 0x8000000000000000ULL; michael@0: static const Bits ExponentBits = 0x7ff0000000000000ULL; michael@0: static const Bits SignificandBits = 0x000fffffffffffffULL; michael@0: }; michael@0: michael@0: template struct SelectTrait; michael@0: template<> struct SelectTrait : public FloatTypeTraits {}; michael@0: template<> struct SelectTrait : public DoubleTypeTraits {}; michael@0: michael@0: /* michael@0: * This struct contains details regarding the encoding of floating-point michael@0: * numbers that can be useful for direct bit manipulation. As of now, the michael@0: * template parameter has to be float or double. michael@0: * michael@0: * The nested typedef |Bits| is the unsigned integral type with the same size michael@0: * as T: uint32_t for float and uint64_t for double (static assertions michael@0: * double-check these assumptions). michael@0: * michael@0: * ExponentBias is the offset that is subtracted from the exponent when michael@0: * computing the value, i.e. one plus the opposite of the mininum possible michael@0: * exponent. michael@0: * ExponentShift is the shift that one needs to apply to retrieve the exponent michael@0: * component of the value. michael@0: * michael@0: * SignBit contains a bits mask. Bit-and-ing with this mask will result in michael@0: * obtaining the sign bit. michael@0: * ExponentBits contains the mask needed for obtaining the exponent bits and michael@0: * SignificandBits contains the mask needed for obtaining the significand bits. michael@0: * michael@0: * Full details of how floating point number formats are encoded are beyond the michael@0: * scope of this comment. For more information, see michael@0: * http://en.wikipedia.org/wiki/IEEE_floating_point michael@0: * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers michael@0: */ michael@0: template michael@0: struct FloatingPoint : public SelectTrait michael@0: { michael@0: typedef SelectTrait Base; michael@0: typedef typename Base::Bits Bits; michael@0: michael@0: static_assert((Base::SignBit & Base::ExponentBits) == 0, michael@0: "sign bit shouldn't overlap exponent bits"); michael@0: static_assert((Base::SignBit & Base::SignificandBits) == 0, michael@0: "sign bit shouldn't overlap significand bits"); michael@0: static_assert((Base::ExponentBits & Base::SignificandBits) == 0, michael@0: "exponent bits shouldn't overlap significand bits"); michael@0: michael@0: static_assert((Base::SignBit | Base::ExponentBits | Base::SignificandBits) == michael@0: ~Bits(0), michael@0: "all bits accounted for"); michael@0: michael@0: /* michael@0: * These implementations assume float/double are 32/64-bit single/double format michael@0: * number types compatible with the IEEE-754 standard. C++ don't require this michael@0: * to be the case. But we required this in implementations of these algorithms michael@0: * that preceded this header, so we shouldn't break anything if we keep doing so. michael@0: */ michael@0: static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T"); michael@0: }; michael@0: michael@0: /** Determines whether a double is NaN. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: IsNaN(T t) michael@0: { michael@0: /* michael@0: * A float/double is NaN if all exponent bits are 1 and the significand contains at michael@0: * least one non-zero bit. michael@0: */ michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: Bits bits = BitwiseCast(t); michael@0: return (bits & Traits::ExponentBits) == Traits::ExponentBits && michael@0: (bits & Traits::SignificandBits) != 0; michael@0: } michael@0: michael@0: /** Determines whether a float/double is +Infinity or -Infinity. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: IsInfinite(T t) michael@0: { michael@0: /* Infinities have all exponent bits set to 1 and an all-0 significand. */ michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: Bits bits = BitwiseCast(t); michael@0: return (bits & ~Traits::SignBit) == Traits::ExponentBits; michael@0: } michael@0: michael@0: /** Determines whether a float/double is not NaN or infinite. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: IsFinite(T t) michael@0: { michael@0: /* michael@0: * NaN and Infinities are the only non-finite floats/doubles, and both have all michael@0: * exponent bits set to 1. michael@0: */ michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: Bits bits = BitwiseCast(t); michael@0: return (bits & Traits::ExponentBits) != Traits::ExponentBits; michael@0: } michael@0: michael@0: /** michael@0: * Determines whether a float/double is negative. It is an error to call this method michael@0: * on a float/double which is NaN. michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: IsNegative(T t) michael@0: { michael@0: MOZ_ASSERT(!IsNaN(t), "NaN does not have a sign"); michael@0: michael@0: /* The sign bit is set if the double is negative. */ michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: Bits bits = BitwiseCast(t); michael@0: return (bits & Traits::SignBit) != 0; michael@0: } michael@0: michael@0: /** Determines whether a float/double represents -0. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: IsNegativeZero(T t) michael@0: { michael@0: /* Only the sign bit is set if the value is -0. */ michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: Bits bits = BitwiseCast(t); michael@0: return bits == Traits::SignBit; michael@0: } michael@0: michael@0: /** michael@0: * Returns the exponent portion of the float/double. michael@0: * michael@0: * Zero is not special-cased, so ExponentComponent(0.0) is michael@0: * -int_fast16_t(Traits::ExponentBias). michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE int_fast16_t michael@0: ExponentComponent(T t) michael@0: { michael@0: /* michael@0: * The exponent component of a float/double is an unsigned number, biased from its michael@0: * actual value. Subtract the bias to retrieve the actual exponent. michael@0: */ michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: Bits bits = BitwiseCast(t); michael@0: return int_fast16_t((bits & Traits::ExponentBits) >> Traits::ExponentShift) - michael@0: int_fast16_t(Traits::ExponentBias); michael@0: } michael@0: michael@0: /** Returns +Infinity. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE T michael@0: PositiveInfinity() michael@0: { michael@0: /* michael@0: * Positive infinity has all exponent bits set, sign bit set to 0, and no michael@0: * significand. michael@0: */ michael@0: typedef FloatingPoint Traits; michael@0: return BitwiseCast(Traits::ExponentBits); michael@0: } michael@0: michael@0: /** Returns -Infinity. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE T michael@0: NegativeInfinity() michael@0: { michael@0: /* michael@0: * Negative infinity has all exponent bits set, sign bit set to 1, and no michael@0: * significand. michael@0: */ michael@0: typedef FloatingPoint Traits; michael@0: return BitwiseCast(Traits::SignBit | Traits::ExponentBits); michael@0: } michael@0: michael@0: michael@0: /** Constructs a NaN value with the specified sign bit and significand bits. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE T michael@0: SpecificNaN(int signbit, typename FloatingPoint::Bits significand) michael@0: { michael@0: typedef FloatingPoint Traits; michael@0: MOZ_ASSERT(signbit == 0 || signbit == 1); michael@0: MOZ_ASSERT((significand & ~Traits::SignificandBits) == 0); michael@0: MOZ_ASSERT(significand & Traits::SignificandBits); michael@0: michael@0: T t = BitwiseCast((signbit ? Traits::SignBit : 0) | michael@0: Traits::ExponentBits | michael@0: significand); michael@0: MOZ_ASSERT(IsNaN(t)); michael@0: return t; michael@0: } michael@0: michael@0: /** Computes the smallest non-zero positive float/double value. */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE T michael@0: MinNumberValue() michael@0: { michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: return BitwiseCast(Bits(1)); michael@0: } michael@0: michael@0: /** michael@0: * If t is equal to some int32_t value, set *i to that value and return true; michael@0: * otherwise return false. michael@0: * michael@0: * Note that negative zero is "equal" to zero here. To test whether a value can michael@0: * be losslessly converted to int32_t and back, use NumberIsInt32 instead. michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: NumberEqualsInt32(T t, int32_t* i) michael@0: { michael@0: /* michael@0: * XXX Casting a floating-point value that doesn't truncate to int32_t, to michael@0: * int32_t, induces undefined behavior. We should definitely fix this michael@0: * (bug 744965), but as apparently it "works" in practice, it's not a michael@0: * pressing concern now. michael@0: */ michael@0: return t == (*i = int32_t(t)); michael@0: } michael@0: michael@0: /** michael@0: * If d can be converted to int32_t and back to an identical double value, michael@0: * set *i to that value and return true; otherwise return false. michael@0: * michael@0: * The difference between this and NumberEqualsInt32 is that this method returns michael@0: * false for negative zero. michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: NumberIsInt32(T t, int32_t* i) michael@0: { michael@0: return !IsNegativeZero(t) && NumberEqualsInt32(t, i); michael@0: } michael@0: michael@0: /** michael@0: * Computes a NaN value. Do not use this method if you depend upon a particular michael@0: * NaN value being returned. michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE T michael@0: UnspecifiedNaN() michael@0: { michael@0: /* michael@0: * If we can use any quiet NaN, we might as well use the all-ones NaN, michael@0: * since it's cheap to materialize on common platforms (such as x64, where michael@0: * this value can be represented in a 32-bit signed immediate field, allowing michael@0: * it to be stored to memory in a single instruction). michael@0: */ michael@0: typedef FloatingPoint Traits; michael@0: return SpecificNaN(1, Traits::SignificandBits); michael@0: } michael@0: michael@0: /** michael@0: * Compare two doubles for equality, *without* equating -0 to +0, and equating michael@0: * any NaN value to any other NaN value. (The normal equality operators equate michael@0: * -0 with +0, and they equate NaN to no other value.) michael@0: */ michael@0: template michael@0: static inline bool michael@0: NumbersAreIdentical(T t1, T t2) michael@0: { michael@0: typedef FloatingPoint Traits; michael@0: typedef typename Traits::Bits Bits; michael@0: if (IsNaN(t1)) michael@0: return IsNaN(t2); michael@0: return BitwiseCast(t1) == BitwiseCast(t2); michael@0: } michael@0: michael@0: namespace detail { michael@0: michael@0: template michael@0: struct FuzzyEqualsEpsilon; michael@0: michael@0: template<> michael@0: struct FuzzyEqualsEpsilon michael@0: { michael@0: // A number near 1e-5 that is exactly representable in michael@0: // floating point michael@0: static const float value() { return 1.0f / (1 << 17); } michael@0: }; michael@0: michael@0: template<> michael@0: struct FuzzyEqualsEpsilon michael@0: { michael@0: // A number near 1e-12 that is exactly representable in michael@0: // a double michael@0: static const double value() { return 1.0 / (1LL << 40); } michael@0: }; michael@0: michael@0: } // namespace detail michael@0: michael@0: /** michael@0: * Compare two floating point values for equality, modulo rounding error. That michael@0: * is, the two values are considered equal if they are both not NaN and if they michael@0: * are less than or equal to epsilon apart. The default value of epsilon is near michael@0: * 1e-5. michael@0: * michael@0: * For most scenarios you will want to use FuzzyEqualsMultiplicative instead, michael@0: * as it is more reasonable over the entire range of floating point numbers. michael@0: * This additive version should only be used if you know the range of the numbers michael@0: * you are dealing with is bounded and stays around the same order of magnitude. michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: FuzzyEqualsAdditive(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon::value()) michael@0: { michael@0: static_assert(IsFloatingPoint::value, "floating point type required"); michael@0: return Abs(val1 - val2) <= epsilon; michael@0: } michael@0: michael@0: /** michael@0: * Compare two floating point values for equality, allowing for rounding error michael@0: * relative to the magnitude of the values. That is, the two values are michael@0: * considered equal if they are both not NaN and they are less than or equal to michael@0: * some epsilon apart, where the epsilon is scaled by the smaller of the two michael@0: * argument values. michael@0: * michael@0: * In most cases you will want to use this rather than FuzzyEqualsAdditive, as michael@0: * this function effectively masks out differences in the bottom few bits of michael@0: * the floating point numbers being compared, regardless of what order of magnitude michael@0: * those numbers are at. michael@0: */ michael@0: template michael@0: static MOZ_ALWAYS_INLINE bool michael@0: FuzzyEqualsMultiplicative(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon::value()) michael@0: { michael@0: static_assert(IsFloatingPoint::value, "floating point type required"); michael@0: // can't use std::min because of bug 965340 michael@0: T smaller = Abs(val1) < Abs(val2) ? Abs(val1) : Abs(val2); michael@0: return Abs(val1 - val2) <= epsilon * smaller; michael@0: } michael@0: michael@0: /** michael@0: * Returns true if the given value can be losslessly represented as an IEEE-754 michael@0: * single format number, false otherwise. All NaN values are considered michael@0: * representable (notwithstanding that the exact bit pattern of a double format michael@0: * NaN value can't be exactly represented in single format). michael@0: * michael@0: * This function isn't inlined to avoid buggy optimizations by MSVC. michael@0: */ michael@0: MOZ_WARN_UNUSED_RESULT michael@0: extern MFBT_API bool michael@0: IsFloat32Representable(double x); michael@0: michael@0: } /* namespace mozilla */ michael@0: michael@0: #endif /* mozilla_FloatingPoint_h */