mfbt/HashFunctions.h

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 /* Utilities for hashing. */
     9 /*
    10  * This file exports functions for hashing data down to a 32-bit value,
    11  * including:
    12  *
    13  *  - HashString    Hash a char* or uint16_t/wchar_t* of known or unknown
    14  *                  length.
    15  *
    16  *  - HashBytes     Hash a byte array of known length.
    17  *
    18  *  - HashGeneric   Hash one or more values.  Currently, we support uint32_t,
    19  *                  types which can be implicitly cast to uint32_t, data
    20  *                  pointers, and function pointers.
    21  *
    22  *  - AddToHash     Add one or more values to the given hash.  This supports the
    23  *                  same list of types as HashGeneric.
    24  *
    25  *
    26  * You can chain these functions together to hash complex objects.  For example:
    27  *
    28  *  class ComplexObject
    29  *  {
    30  *      char* str;
    31  *      uint32_t uint1, uint2;
    32  *      void (*callbackFn)();
    33  *
    34  *    public:
    35  *      uint32_t hash() {
    36  *        uint32_t hash = HashString(str);
    37  *        hash = AddToHash(hash, uint1, uint2);
    38  *        return AddToHash(hash, callbackFn);
    39  *      }
    40  *  };
    41  *
    42  * If you want to hash an nsAString or nsACString, use the HashString functions
    43  * in nsHashKeys.h.
    44  */
    46 #ifndef mozilla_HashFunctions_h
    47 #define mozilla_HashFunctions_h
    49 #include "mozilla/Assertions.h"
    50 #include "mozilla/Attributes.h"
    51 #include "mozilla/Char16.h"
    52 #include "mozilla/Types.h"
    54 #include <stdint.h>
    56 #ifdef __cplusplus
    57 namespace mozilla {
    59 /**
    60  * The golden ratio as a 32-bit fixed-point value.
    61  */
    62 static const uint32_t GoldenRatioU32 = 0x9E3779B9U;
    64 inline uint32_t
    65 RotateBitsLeft32(uint32_t value, uint8_t bits)
    66 {
    67   MOZ_ASSERT(bits < 32);
    68   return (value << bits) | (value >> (32 - bits));
    69 }
    71 namespace detail {
    73 inline uint32_t
    74 AddU32ToHash(uint32_t hash, uint32_t value)
    75 {
    76   /*
    77    * This is the meat of all our hash routines.  This hash function is not
    78    * particularly sophisticated, but it seems to work well for our mostly
    79    * plain-text inputs.  Implementation notes follow.
    80    *
    81    * Our use of the golden ratio here is arbitrary; we could pick almost any
    82    * number which:
    83    *
    84    *  * is odd (because otherwise, all our hash values will be even)
    85    *
    86    *  * has a reasonably-even mix of 1's and 0's (consider the extreme case
    87    *    where we multiply by 0x3 or 0xeffffff -- this will not produce good
    88    *    mixing across all bits of the hash).
    89    *
    90    * The rotation length of 5 is also arbitrary, although an odd number is again
    91    * preferable so our hash explores the whole universe of possible rotations.
    92    *
    93    * Finally, we multiply by the golden ratio *after* xor'ing, not before.
    94    * Otherwise, if |hash| is 0 (as it often is for the beginning of a message),
    95    * the expression
    96    *
    97    *   (GoldenRatioU32 * RotateBitsLeft(hash, 5)) |xor| value
    98    *
    99    * evaluates to |value|.
   100    *
   101    * (Number-theoretic aside: Because any odd number |m| is relatively prime to
   102    * our modulus (2^32), the list
   103    *
   104    *    [x * m (mod 2^32) for 0 <= x < 2^32]
   105    *
   106    * has no duplicate elements.  This means that multiplying by |m| does not
   107    * cause us to skip any possible hash values.
   108    *
   109    * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
   110    * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
   111    * multiply our hash value by |m| a few times without negating the
   112    * multiplicative effect.  Our golden ratio constant has order 2^29, which is
   113    * more than enough for our purposes.)
   114    */
   115   return GoldenRatioU32 * (RotateBitsLeft32(hash, 5) ^ value);
   116 }
   118 /**
   119  * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.
   120  */
   121 template<size_t PtrSize>
   122 inline uint32_t
   123 AddUintptrToHash(uint32_t hash, uintptr_t value);
   125 template<>
   126 inline uint32_t
   127 AddUintptrToHash<4>(uint32_t hash, uintptr_t value)
   128 {
   129   return AddU32ToHash(hash, static_cast<uint32_t>(value));
   130 }
   132 template<>
   133 inline uint32_t
   134 AddUintptrToHash<8>(uint32_t hash, uintptr_t value)
   135 {
   136   /*
   137    * The static cast to uint64_t below is necessary because this function
   138    * sometimes gets compiled on 32-bit platforms (yes, even though it's a
   139    * template and we never call this particular override in a 32-bit build).  If
   140    * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
   141    * right 32 bits, and the compiler throws an error.
   142    */
   143   uint32_t v1 = static_cast<uint32_t>(value);
   144   uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32);
   145   return AddU32ToHash(AddU32ToHash(hash, v1), v2);
   146 }
   148 } /* namespace detail */
   150 /**
   151  * AddToHash takes a hash and some values and returns a new hash based on the
   152  * inputs.
   153  *
   154  * Currently, we support hashing uint32_t's, values which we can implicitly
   155  * convert to uint32_t, data pointers, and function pointers.
   156  */
   157 template<typename A>
   158 MOZ_WARN_UNUSED_RESULT
   159 inline uint32_t
   160 AddToHash(uint32_t hash, A a)
   161 {
   162   /*
   163    * Try to convert |A| to uint32_t implicitly.  If this works, great.  If not,
   164    * we'll error out.
   165    */
   166   return detail::AddU32ToHash(hash, a);
   167 }
   169 template<typename A>
   170 MOZ_WARN_UNUSED_RESULT
   171 inline uint32_t
   172 AddToHash(uint32_t hash, A* a)
   173 {
   174   /*
   175    * You might think this function should just take a void*.  But then we'd only
   176    * catch data pointers and couldn't handle function pointers.
   177    */
   179   static_assert(sizeof(a) == sizeof(uintptr_t),
   180                 "Strange pointer!");
   182   return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a));
   183 }
   185 template<>
   186 MOZ_WARN_UNUSED_RESULT
   187 inline uint32_t
   188 AddToHash(uint32_t hash, uintptr_t a)
   189 {
   190   return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, a);
   191 }
   193 template<typename A, typename B>
   194 MOZ_WARN_UNUSED_RESULT
   195 uint32_t
   196 AddToHash(uint32_t hash, A a, B b)
   197 {
   198   return AddToHash(AddToHash(hash, a), b);
   199 }
   201 template<typename A, typename B, typename C>
   202 MOZ_WARN_UNUSED_RESULT
   203 uint32_t
   204 AddToHash(uint32_t hash, A a, B b, C c)
   205 {
   206   return AddToHash(AddToHash(hash, a, b), c);
   207 }
   209 template<typename A, typename B, typename C, typename D>
   210 MOZ_WARN_UNUSED_RESULT
   211 uint32_t
   212 AddToHash(uint32_t hash, A a, B b, C c, D d)
   213 {
   214   return AddToHash(AddToHash(hash, a, b, c), d);
   215 }
   217 template<typename A, typename B, typename C, typename D, typename E>
   218 MOZ_WARN_UNUSED_RESULT
   219 uint32_t
   220 AddToHash(uint32_t hash, A a, B b, C c, D d, E e)
   221 {
   222   return AddToHash(AddToHash(hash, a, b, c, d), e);
   223 }
   225 /**
   226  * The HashGeneric class of functions let you hash one or more values.
   227  *
   228  * If you want to hash together two values x and y, calling HashGeneric(x, y) is
   229  * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes
   230  * that x has already been hashed.
   231  */
   232 template<typename A>
   233 MOZ_WARN_UNUSED_RESULT
   234 inline uint32_t
   235 HashGeneric(A a)
   236 {
   237   return AddToHash(0, a);
   238 }
   240 template<typename A, typename B>
   241 MOZ_WARN_UNUSED_RESULT
   242 inline uint32_t
   243 HashGeneric(A a, B b)
   244 {
   245   return AddToHash(0, a, b);
   246 }
   248 template<typename A, typename B, typename C>
   249 MOZ_WARN_UNUSED_RESULT
   250 inline uint32_t
   251 HashGeneric(A a, B b, C c)
   252 {
   253   return AddToHash(0, a, b, c);
   254 }
   256 template<typename A, typename B, typename C, typename D>
   257 MOZ_WARN_UNUSED_RESULT
   258 inline uint32_t
   259 HashGeneric(A a, B b, C c, D d)
   260 {
   261   return AddToHash(0, a, b, c, d);
   262 }
   264 template<typename A, typename B, typename C, typename D, typename E>
   265 MOZ_WARN_UNUSED_RESULT
   266 inline uint32_t
   267 HashGeneric(A a, B b, C c, D d, E e)
   268 {
   269   return AddToHash(0, a, b, c, d, e);
   270 }
   272 namespace detail {
   274 template<typename T>
   275 uint32_t
   276 HashUntilZero(const T* str)
   277 {
   278   uint32_t hash = 0;
   279   for (T c; (c = *str); str++)
   280     hash = AddToHash(hash, c);
   281   return hash;
   282 }
   284 template<typename T>
   285 uint32_t
   286 HashKnownLength(const T* str, size_t length)
   287 {
   288   uint32_t hash = 0;
   289   for (size_t i = 0; i < length; i++)
   290     hash = AddToHash(hash, str[i]);
   291   return hash;
   292 }
   294 } /* namespace detail */
   296 /**
   297  * The HashString overloads below do just what you'd expect.
   298  *
   299  * If you have the string's length, you might as well call the overload which
   300  * includes the length.  It may be marginally faster.
   301  */
   302 MOZ_WARN_UNUSED_RESULT
   303 inline uint32_t
   304 HashString(const char* str)
   305 {
   306   return detail::HashUntilZero(str);
   307 }
   309 MOZ_WARN_UNUSED_RESULT
   310 inline uint32_t
   311 HashString(const char* str, size_t length)
   312 {
   313   return detail::HashKnownLength(str, length);
   314 }
   316 MOZ_WARN_UNUSED_RESULT
   317 inline uint32_t
   318 HashString(const uint16_t* str)
   319 {
   320   return detail::HashUntilZero(str);
   321 }
   323 MOZ_WARN_UNUSED_RESULT
   324 inline uint32_t
   325 HashString(const uint16_t* str, size_t length)
   326 {
   327   return detail::HashKnownLength(str, length);
   328 }
   330 #ifdef MOZ_CHAR16_IS_NOT_WCHAR
   331 MOZ_WARN_UNUSED_RESULT
   332 inline uint32_t
   333 HashString(const char16_t* str)
   334 {
   335   return detail::HashUntilZero(str);
   336 }
   338 MOZ_WARN_UNUSED_RESULT
   339 inline uint32_t
   340 HashString(const char16_t* str, size_t length)
   341 {
   342   return detail::HashKnownLength(str, length);
   343 }
   344 #endif
   346 /*
   347  * On Windows, wchar_t (char16_t) is not the same as uint16_t, even though it's
   348  * the same width!
   349  */
   350 #ifdef WIN32
   351 MOZ_WARN_UNUSED_RESULT
   352 inline uint32_t
   353 HashString(const wchar_t* str)
   354 {
   355   return detail::HashUntilZero(str);
   356 }
   358 MOZ_WARN_UNUSED_RESULT
   359 inline uint32_t
   360 HashString(const wchar_t* str, size_t length)
   361 {
   362   return detail::HashKnownLength(str, length);
   363 }
   364 #endif
   366 /**
   367  * Hash some number of bytes.
   368  *
   369  * This hash walks word-by-word, rather than byte-by-byte, so you won't get the
   370  * same result out of HashBytes as you would out of HashString.
   371  */
   372 MOZ_WARN_UNUSED_RESULT
   373 extern MFBT_API uint32_t
   374 HashBytes(const void* bytes, size_t length);
   376 } /* namespace mozilla */
   377 #endif /* __cplusplus */
   379 #endif /* mozilla_HashFunctions_h */

mercurial