mfbt/FloatingPoint.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/mfbt/FloatingPoint.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,409 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +/* Various predicates and operations on IEEE-754 floating point types. */
    1.11 +
    1.12 +#ifndef mozilla_FloatingPoint_h
    1.13 +#define mozilla_FloatingPoint_h
    1.14 +
    1.15 +#include "mozilla/Assertions.h"
    1.16 +#include "mozilla/Attributes.h"
    1.17 +#include "mozilla/Casting.h"
    1.18 +#include "mozilla/MathAlgorithms.h"
    1.19 +#include "mozilla/Types.h"
    1.20 +
    1.21 +#include <stdint.h>
    1.22 +
    1.23 +namespace mozilla {
    1.24 +
    1.25 +/*
    1.26 + * It's reasonable to ask why we have this header at all.  Don't isnan,
    1.27 + * copysign, the built-in comparison operators, and the like solve these
    1.28 + * problems?  Unfortunately, they don't.  We've found that various compilers
    1.29 + * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile
    1.30 + * the standard methods in various situations, so we can't use them.  Some of
    1.31 + * these compilers even have problems compiling seemingly reasonable bitwise
    1.32 + * algorithms!  But with some care we've found algorithms that seem to not
    1.33 + * trigger those compiler bugs.
    1.34 + *
    1.35 + * For the aforementioned reasons, be very wary of making changes to any of
    1.36 + * these algorithms.  If you must make changes, keep a careful eye out for
    1.37 + * compiler bustage, particularly PGO-specific bustage.
    1.38 + */
    1.39 +
    1.40 +struct FloatTypeTraits
    1.41 +{
    1.42 +    typedef uint32_t Bits;
    1.43 +
    1.44 +    static const unsigned ExponentBias = 127;
    1.45 +    static const unsigned ExponentShift = 23;
    1.46 +
    1.47 +    static const Bits SignBit         = 0x80000000UL;
    1.48 +    static const Bits ExponentBits    = 0x7F800000UL;
    1.49 +    static const Bits SignificandBits = 0x007FFFFFUL;
    1.50 +};
    1.51 +
    1.52 +struct DoubleTypeTraits
    1.53 +{
    1.54 +    typedef uint64_t Bits;
    1.55 +
    1.56 +    static const unsigned ExponentBias = 1023;
    1.57 +    static const unsigned ExponentShift = 52;
    1.58 +
    1.59 +    static const Bits SignBit         = 0x8000000000000000ULL;
    1.60 +    static const Bits ExponentBits    = 0x7ff0000000000000ULL;
    1.61 +    static const Bits SignificandBits = 0x000fffffffffffffULL;
    1.62 +};
    1.63 +
    1.64 +template<typename T> struct SelectTrait;
    1.65 +template<> struct SelectTrait<float> : public FloatTypeTraits {};
    1.66 +template<> struct SelectTrait<double> : public DoubleTypeTraits {};
    1.67 +
    1.68 +/*
    1.69 + *  This struct contains details regarding the encoding of floating-point
    1.70 + *  numbers that can be useful for direct bit manipulation. As of now, the
    1.71 + *  template parameter has to be float or double.
    1.72 + *
    1.73 + *  The nested typedef |Bits| is the unsigned integral type with the same size
    1.74 + *  as T: uint32_t for float and uint64_t for double (static assertions
    1.75 + *  double-check these assumptions).
    1.76 + *
    1.77 + *  ExponentBias is the offset that is subtracted from the exponent when
    1.78 + *  computing the value, i.e. one plus the opposite of the mininum possible
    1.79 + *  exponent.
    1.80 + *  ExponentShift is the shift that one needs to apply to retrieve the exponent
    1.81 + *  component of the value.
    1.82 + *
    1.83 + *  SignBit contains a bits mask. Bit-and-ing with this mask will result in
    1.84 + *  obtaining the sign bit.
    1.85 + *  ExponentBits contains the mask needed for obtaining the exponent bits and
    1.86 + *  SignificandBits contains the mask needed for obtaining the significand bits.
    1.87 + *
    1.88 + *  Full details of how floating point number formats are encoded are beyond the
    1.89 + *  scope of this comment. For more information, see
    1.90 + *  http://en.wikipedia.org/wiki/IEEE_floating_point
    1.91 + *  http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers
    1.92 + */
    1.93 +template<typename T>
    1.94 +struct FloatingPoint : public SelectTrait<T>
    1.95 +{
    1.96 +    typedef SelectTrait<T> Base;
    1.97 +    typedef typename Base::Bits Bits;
    1.98 +
    1.99 +    static_assert((Base::SignBit & Base::ExponentBits) == 0,
   1.100 +                  "sign bit shouldn't overlap exponent bits");
   1.101 +    static_assert((Base::SignBit & Base::SignificandBits) == 0,
   1.102 +                  "sign bit shouldn't overlap significand bits");
   1.103 +    static_assert((Base::ExponentBits & Base::SignificandBits) == 0,
   1.104 +                  "exponent bits shouldn't overlap significand bits");
   1.105 +
   1.106 +    static_assert((Base::SignBit | Base::ExponentBits | Base::SignificandBits) ==
   1.107 +                  ~Bits(0),
   1.108 +                  "all bits accounted for");
   1.109 +
   1.110 +    /*
   1.111 +     * These implementations assume float/double are 32/64-bit single/double format
   1.112 +     * number types compatible with the IEEE-754 standard.  C++ don't require this
   1.113 +     * to be the case.  But we required this in implementations of these algorithms
   1.114 +     * that preceded this header, so we shouldn't break anything if we keep doing so.
   1.115 +     */
   1.116 +    static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T");
   1.117 +};
   1.118 +
   1.119 +/** Determines whether a double is NaN. */
   1.120 +template<typename T>
   1.121 +static MOZ_ALWAYS_INLINE bool
   1.122 +IsNaN(T t)
   1.123 +{
   1.124 +  /*
   1.125 +   * A float/double is NaN if all exponent bits are 1 and the significand contains at
   1.126 +   * least one non-zero bit.
   1.127 +   */
   1.128 +  typedef FloatingPoint<T> Traits;
   1.129 +  typedef typename Traits::Bits Bits;
   1.130 +  Bits bits = BitwiseCast<Bits>(t);
   1.131 +  return (bits & Traits::ExponentBits) == Traits::ExponentBits &&
   1.132 +         (bits & Traits::SignificandBits) != 0;
   1.133 +}
   1.134 +
   1.135 +/** Determines whether a float/double is +Infinity or -Infinity. */
   1.136 +template<typename T>
   1.137 +static MOZ_ALWAYS_INLINE bool
   1.138 +IsInfinite(T t)
   1.139 +{
   1.140 +  /* Infinities have all exponent bits set to 1 and an all-0 significand. */
   1.141 +  typedef FloatingPoint<T> Traits;
   1.142 +  typedef typename Traits::Bits Bits;
   1.143 +  Bits bits = BitwiseCast<Bits>(t);
   1.144 +  return (bits & ~Traits::SignBit) == Traits::ExponentBits;
   1.145 +}
   1.146 +
   1.147 +/** Determines whether a float/double is not NaN or infinite. */
   1.148 +template<typename T>
   1.149 +static MOZ_ALWAYS_INLINE bool
   1.150 +IsFinite(T t)
   1.151 +{
   1.152 +  /*
   1.153 +   * NaN and Infinities are the only non-finite floats/doubles, and both have all
   1.154 +   * exponent bits set to 1.
   1.155 +   */
   1.156 +  typedef FloatingPoint<T> Traits;
   1.157 +  typedef typename Traits::Bits Bits;
   1.158 +  Bits bits = BitwiseCast<Bits>(t);
   1.159 +  return (bits & Traits::ExponentBits) != Traits::ExponentBits;
   1.160 +}
   1.161 +
   1.162 +/**
   1.163 + * Determines whether a float/double is negative.  It is an error to call this method
   1.164 + * on a float/double which is NaN.
   1.165 + */
   1.166 +template<typename T>
   1.167 +static MOZ_ALWAYS_INLINE bool
   1.168 +IsNegative(T t)
   1.169 +{
   1.170 +  MOZ_ASSERT(!IsNaN(t), "NaN does not have a sign");
   1.171 +
   1.172 +  /* The sign bit is set if the double is negative. */
   1.173 +  typedef FloatingPoint<T> Traits;
   1.174 +  typedef typename Traits::Bits Bits;
   1.175 +  Bits bits = BitwiseCast<Bits>(t);
   1.176 +  return (bits & Traits::SignBit) != 0;
   1.177 +}
   1.178 +
   1.179 +/** Determines whether a float/double represents -0. */
   1.180 +template<typename T>
   1.181 +static MOZ_ALWAYS_INLINE bool
   1.182 +IsNegativeZero(T t)
   1.183 +{
   1.184 +  /* Only the sign bit is set if the value is -0. */
   1.185 +  typedef FloatingPoint<T> Traits;
   1.186 +  typedef typename Traits::Bits Bits;
   1.187 +  Bits bits = BitwiseCast<Bits>(t);
   1.188 +  return bits == Traits::SignBit;
   1.189 +}
   1.190 +
   1.191 +/**
   1.192 + * Returns the exponent portion of the float/double.
   1.193 + *
   1.194 + * Zero is not special-cased, so ExponentComponent(0.0) is
   1.195 + * -int_fast16_t(Traits::ExponentBias).
   1.196 + */
   1.197 +template<typename T>
   1.198 +static MOZ_ALWAYS_INLINE int_fast16_t
   1.199 +ExponentComponent(T t)
   1.200 +{
   1.201 +  /*
   1.202 +   * The exponent component of a float/double is an unsigned number, biased from its
   1.203 +   * actual value.  Subtract the bias to retrieve the actual exponent.
   1.204 +   */
   1.205 +  typedef FloatingPoint<T> Traits;
   1.206 +  typedef typename Traits::Bits Bits;
   1.207 +  Bits bits = BitwiseCast<Bits>(t);
   1.208 +  return int_fast16_t((bits & Traits::ExponentBits) >> Traits::ExponentShift) -
   1.209 +         int_fast16_t(Traits::ExponentBias);
   1.210 +}
   1.211 +
   1.212 +/** Returns +Infinity. */
   1.213 +template<typename T>
   1.214 +static MOZ_ALWAYS_INLINE T
   1.215 +PositiveInfinity()
   1.216 +{
   1.217 +  /*
   1.218 +   * Positive infinity has all exponent bits set, sign bit set to 0, and no
   1.219 +   * significand.
   1.220 +   */
   1.221 +  typedef FloatingPoint<T> Traits;
   1.222 +  return BitwiseCast<T>(Traits::ExponentBits);
   1.223 +}
   1.224 +
   1.225 +/** Returns -Infinity. */
   1.226 +template<typename T>
   1.227 +static MOZ_ALWAYS_INLINE T
   1.228 +NegativeInfinity()
   1.229 +{
   1.230 +  /*
   1.231 +   * Negative infinity has all exponent bits set, sign bit set to 1, and no
   1.232 +   * significand.
   1.233 +   */
   1.234 +  typedef FloatingPoint<T> Traits;
   1.235 +  return BitwiseCast<T>(Traits::SignBit | Traits::ExponentBits);
   1.236 +}
   1.237 +
   1.238 +
   1.239 +/** Constructs a NaN value with the specified sign bit and significand bits. */
   1.240 +template<typename T>
   1.241 +static MOZ_ALWAYS_INLINE T
   1.242 +SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand)
   1.243 +{
   1.244 +  typedef FloatingPoint<T> Traits;
   1.245 +  MOZ_ASSERT(signbit == 0 || signbit == 1);
   1.246 +  MOZ_ASSERT((significand & ~Traits::SignificandBits) == 0);
   1.247 +  MOZ_ASSERT(significand & Traits::SignificandBits);
   1.248 +
   1.249 +  T t = BitwiseCast<T>((signbit ? Traits::SignBit : 0) |
   1.250 +                       Traits::ExponentBits |
   1.251 +                       significand);
   1.252 +  MOZ_ASSERT(IsNaN(t));
   1.253 +  return t;
   1.254 +}
   1.255 +
   1.256 +/** Computes the smallest non-zero positive float/double value. */
   1.257 +template<typename T>
   1.258 +static MOZ_ALWAYS_INLINE T
   1.259 +MinNumberValue()
   1.260 +{
   1.261 +  typedef FloatingPoint<T> Traits;
   1.262 +  typedef typename Traits::Bits Bits;
   1.263 +  return BitwiseCast<T>(Bits(1));
   1.264 +}
   1.265 +
   1.266 +/**
   1.267 + * If t is equal to some int32_t value, set *i to that value and return true;
   1.268 + * otherwise return false.
   1.269 + *
   1.270 + * Note that negative zero is "equal" to zero here. To test whether a value can
   1.271 + * be losslessly converted to int32_t and back, use NumberIsInt32 instead.
   1.272 + */
   1.273 +template<typename T>
   1.274 +static MOZ_ALWAYS_INLINE bool
   1.275 +NumberEqualsInt32(T t, int32_t* i)
   1.276 +{
   1.277 +  /*
   1.278 +   * XXX Casting a floating-point value that doesn't truncate to int32_t, to
   1.279 +   *     int32_t, induces undefined behavior.  We should definitely fix this
   1.280 +   *     (bug 744965), but as apparently it "works" in practice, it's not a
   1.281 +   *     pressing concern now.
   1.282 +   */
   1.283 +  return t == (*i = int32_t(t));
   1.284 +}
   1.285 +
   1.286 +/**
   1.287 + * If d can be converted to int32_t and back to an identical double value,
   1.288 + * set *i to that value and return true; otherwise return false.
   1.289 + *
   1.290 + * The difference between this and NumberEqualsInt32 is that this method returns
   1.291 + * false for negative zero.
   1.292 + */
   1.293 +template<typename T>
   1.294 +static MOZ_ALWAYS_INLINE bool
   1.295 +NumberIsInt32(T t, int32_t* i)
   1.296 +{
   1.297 +  return !IsNegativeZero(t) && NumberEqualsInt32(t, i);
   1.298 +}
   1.299 +
   1.300 +/**
   1.301 + * Computes a NaN value.  Do not use this method if you depend upon a particular
   1.302 + * NaN value being returned.
   1.303 + */
   1.304 +template<typename T>
   1.305 +static MOZ_ALWAYS_INLINE T
   1.306 +UnspecifiedNaN()
   1.307 +{
   1.308 +  /*
   1.309 +   * If we can use any quiet NaN, we might as well use the all-ones NaN,
   1.310 +   * since it's cheap to materialize on common platforms (such as x64, where
   1.311 +   * this value can be represented in a 32-bit signed immediate field, allowing
   1.312 +   * it to be stored to memory in a single instruction).
   1.313 +   */
   1.314 +  typedef FloatingPoint<T> Traits;
   1.315 +  return SpecificNaN<T>(1, Traits::SignificandBits);
   1.316 +}
   1.317 +
   1.318 +/**
   1.319 + * Compare two doubles for equality, *without* equating -0 to +0, and equating
   1.320 + * any NaN value to any other NaN value.  (The normal equality operators equate
   1.321 + * -0 with +0, and they equate NaN to no other value.)
   1.322 + */
   1.323 +template<typename T>
   1.324 +static inline bool
   1.325 +NumbersAreIdentical(T t1, T t2)
   1.326 +{
   1.327 +  typedef FloatingPoint<T> Traits;
   1.328 +  typedef typename Traits::Bits Bits;
   1.329 +  if (IsNaN(t1))
   1.330 +    return IsNaN(t2);
   1.331 +  return BitwiseCast<Bits>(t1) == BitwiseCast<Bits>(t2);
   1.332 +}
   1.333 +
   1.334 +namespace detail {
   1.335 +
   1.336 +template<typename T>
   1.337 +struct FuzzyEqualsEpsilon;
   1.338 +
   1.339 +template<>
   1.340 +struct FuzzyEqualsEpsilon<float>
   1.341 +{
   1.342 +  // A number near 1e-5 that is exactly representable in
   1.343 +  // floating point
   1.344 +  static const float value() { return 1.0f / (1 << 17); }
   1.345 +};
   1.346 +
   1.347 +template<>
   1.348 +struct FuzzyEqualsEpsilon<double>
   1.349 +{
   1.350 +  // A number near 1e-12 that is exactly representable in
   1.351 +  // a double
   1.352 +  static const double value() { return 1.0 / (1LL << 40); }
   1.353 +};
   1.354 +
   1.355 +} // namespace detail
   1.356 +
   1.357 +/**
   1.358 + * Compare two floating point values for equality, modulo rounding error. That
   1.359 + * is, the two values are considered equal if they are both not NaN and if they
   1.360 + * are less than or equal to epsilon apart. The default value of epsilon is near
   1.361 + * 1e-5.
   1.362 + *
   1.363 + * For most scenarios you will want to use FuzzyEqualsMultiplicative instead,
   1.364 + * as it is more reasonable over the entire range of floating point numbers.
   1.365 + * This additive version should only be used if you know the range of the numbers
   1.366 + * you are dealing with is bounded and stays around the same order of magnitude.
   1.367 + */
   1.368 +template<typename T>
   1.369 +static MOZ_ALWAYS_INLINE bool
   1.370 +FuzzyEqualsAdditive(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon<T>::value())
   1.371 +{
   1.372 +  static_assert(IsFloatingPoint<T>::value, "floating point type required");
   1.373 +  return Abs(val1 - val2) <= epsilon;
   1.374 +}
   1.375 +
   1.376 +/**
   1.377 + * Compare two floating point values for equality, allowing for rounding error
   1.378 + * relative to the magnitude of the values. That is, the two values are
   1.379 + * considered equal if they are both not NaN and they are less than or equal to
   1.380 + * some epsilon apart, where the epsilon is scaled by the smaller of the two
   1.381 + * argument values.
   1.382 + *
   1.383 + * In most cases you will want to use this rather than FuzzyEqualsAdditive, as
   1.384 + * this function effectively masks out differences in the bottom few bits of
   1.385 + * the floating point numbers being compared, regardless of what order of magnitude
   1.386 + * those numbers are at.
   1.387 + */
   1.388 +template<typename T>
   1.389 +static MOZ_ALWAYS_INLINE bool
   1.390 +FuzzyEqualsMultiplicative(T val1, T val2, T epsilon = detail::FuzzyEqualsEpsilon<T>::value())
   1.391 +{
   1.392 +  static_assert(IsFloatingPoint<T>::value, "floating point type required");
   1.393 +  // can't use std::min because of bug 965340
   1.394 +  T smaller = Abs(val1) < Abs(val2) ? Abs(val1) : Abs(val2);
   1.395 +  return Abs(val1 - val2) <= epsilon * smaller;
   1.396 +}
   1.397 +
   1.398 +/**
   1.399 + * Returns true if the given value can be losslessly represented as an IEEE-754
   1.400 + * single format number, false otherwise.  All NaN values are considered
   1.401 + * representable (notwithstanding that the exact bit pattern of a double format
   1.402 + * NaN value can't be exactly represented in single format).
   1.403 + *
   1.404 + * This function isn't inlined to avoid buggy optimizations by MSVC.
   1.405 + */
   1.406 +MOZ_WARN_UNUSED_RESULT
   1.407 +extern MFBT_API bool
   1.408 +IsFloat32Representable(double x);
   1.409 +
   1.410 +} /* namespace mozilla */
   1.411 +
   1.412 +#endif /* mozilla_FloatingPoint_h */

mercurial