mfbt/HashFunctions.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 /* Utilities for hashing. */
michael@0 8
michael@0 9 /*
michael@0 10 * This file exports functions for hashing data down to a 32-bit value,
michael@0 11 * including:
michael@0 12 *
michael@0 13 * - HashString Hash a char* or uint16_t/wchar_t* of known or unknown
michael@0 14 * length.
michael@0 15 *
michael@0 16 * - HashBytes Hash a byte array of known length.
michael@0 17 *
michael@0 18 * - HashGeneric Hash one or more values. Currently, we support uint32_t,
michael@0 19 * types which can be implicitly cast to uint32_t, data
michael@0 20 * pointers, and function pointers.
michael@0 21 *
michael@0 22 * - AddToHash Add one or more values to the given hash. This supports the
michael@0 23 * same list of types as HashGeneric.
michael@0 24 *
michael@0 25 *
michael@0 26 * You can chain these functions together to hash complex objects. For example:
michael@0 27 *
michael@0 28 * class ComplexObject
michael@0 29 * {
michael@0 30 * char* str;
michael@0 31 * uint32_t uint1, uint2;
michael@0 32 * void (*callbackFn)();
michael@0 33 *
michael@0 34 * public:
michael@0 35 * uint32_t hash() {
michael@0 36 * uint32_t hash = HashString(str);
michael@0 37 * hash = AddToHash(hash, uint1, uint2);
michael@0 38 * return AddToHash(hash, callbackFn);
michael@0 39 * }
michael@0 40 * };
michael@0 41 *
michael@0 42 * If you want to hash an nsAString or nsACString, use the HashString functions
michael@0 43 * in nsHashKeys.h.
michael@0 44 */
michael@0 45
michael@0 46 #ifndef mozilla_HashFunctions_h
michael@0 47 #define mozilla_HashFunctions_h
michael@0 48
michael@0 49 #include "mozilla/Assertions.h"
michael@0 50 #include "mozilla/Attributes.h"
michael@0 51 #include "mozilla/Char16.h"
michael@0 52 #include "mozilla/Types.h"
michael@0 53
michael@0 54 #include <stdint.h>
michael@0 55
michael@0 56 #ifdef __cplusplus
michael@0 57 namespace mozilla {
michael@0 58
michael@0 59 /**
michael@0 60 * The golden ratio as a 32-bit fixed-point value.
michael@0 61 */
michael@0 62 static const uint32_t GoldenRatioU32 = 0x9E3779B9U;
michael@0 63
michael@0 64 inline uint32_t
michael@0 65 RotateBitsLeft32(uint32_t value, uint8_t bits)
michael@0 66 {
michael@0 67 MOZ_ASSERT(bits < 32);
michael@0 68 return (value << bits) | (value >> (32 - bits));
michael@0 69 }
michael@0 70
michael@0 71 namespace detail {
michael@0 72
michael@0 73 inline uint32_t
michael@0 74 AddU32ToHash(uint32_t hash, uint32_t value)
michael@0 75 {
michael@0 76 /*
michael@0 77 * This is the meat of all our hash routines. This hash function is not
michael@0 78 * particularly sophisticated, but it seems to work well for our mostly
michael@0 79 * plain-text inputs. Implementation notes follow.
michael@0 80 *
michael@0 81 * Our use of the golden ratio here is arbitrary; we could pick almost any
michael@0 82 * number which:
michael@0 83 *
michael@0 84 * * is odd (because otherwise, all our hash values will be even)
michael@0 85 *
michael@0 86 * * has a reasonably-even mix of 1's and 0's (consider the extreme case
michael@0 87 * where we multiply by 0x3 or 0xeffffff -- this will not produce good
michael@0 88 * mixing across all bits of the hash).
michael@0 89 *
michael@0 90 * The rotation length of 5 is also arbitrary, although an odd number is again
michael@0 91 * preferable so our hash explores the whole universe of possible rotations.
michael@0 92 *
michael@0 93 * Finally, we multiply by the golden ratio *after* xor'ing, not before.
michael@0 94 * Otherwise, if |hash| is 0 (as it often is for the beginning of a message),
michael@0 95 * the expression
michael@0 96 *
michael@0 97 * (GoldenRatioU32 * RotateBitsLeft(hash, 5)) |xor| value
michael@0 98 *
michael@0 99 * evaluates to |value|.
michael@0 100 *
michael@0 101 * (Number-theoretic aside: Because any odd number |m| is relatively prime to
michael@0 102 * our modulus (2^32), the list
michael@0 103 *
michael@0 104 * [x * m (mod 2^32) for 0 <= x < 2^32]
michael@0 105 *
michael@0 106 * has no duplicate elements. This means that multiplying by |m| does not
michael@0 107 * cause us to skip any possible hash values.
michael@0 108 *
michael@0 109 * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
michael@0 110 * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
michael@0 111 * multiply our hash value by |m| a few times without negating the
michael@0 112 * multiplicative effect. Our golden ratio constant has order 2^29, which is
michael@0 113 * more than enough for our purposes.)
michael@0 114 */
michael@0 115 return GoldenRatioU32 * (RotateBitsLeft32(hash, 5) ^ value);
michael@0 116 }
michael@0 117
michael@0 118 /**
michael@0 119 * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.
michael@0 120 */
michael@0 121 template<size_t PtrSize>
michael@0 122 inline uint32_t
michael@0 123 AddUintptrToHash(uint32_t hash, uintptr_t value);
michael@0 124
michael@0 125 template<>
michael@0 126 inline uint32_t
michael@0 127 AddUintptrToHash<4>(uint32_t hash, uintptr_t value)
michael@0 128 {
michael@0 129 return AddU32ToHash(hash, static_cast<uint32_t>(value));
michael@0 130 }
michael@0 131
michael@0 132 template<>
michael@0 133 inline uint32_t
michael@0 134 AddUintptrToHash<8>(uint32_t hash, uintptr_t value)
michael@0 135 {
michael@0 136 /*
michael@0 137 * The static cast to uint64_t below is necessary because this function
michael@0 138 * sometimes gets compiled on 32-bit platforms (yes, even though it's a
michael@0 139 * template and we never call this particular override in a 32-bit build). If
michael@0 140 * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
michael@0 141 * right 32 bits, and the compiler throws an error.
michael@0 142 */
michael@0 143 uint32_t v1 = static_cast<uint32_t>(value);
michael@0 144 uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32);
michael@0 145 return AddU32ToHash(AddU32ToHash(hash, v1), v2);
michael@0 146 }
michael@0 147
michael@0 148 } /* namespace detail */
michael@0 149
michael@0 150 /**
michael@0 151 * AddToHash takes a hash and some values and returns a new hash based on the
michael@0 152 * inputs.
michael@0 153 *
michael@0 154 * Currently, we support hashing uint32_t's, values which we can implicitly
michael@0 155 * convert to uint32_t, data pointers, and function pointers.
michael@0 156 */
michael@0 157 template<typename A>
michael@0 158 MOZ_WARN_UNUSED_RESULT
michael@0 159 inline uint32_t
michael@0 160 AddToHash(uint32_t hash, A a)
michael@0 161 {
michael@0 162 /*
michael@0 163 * Try to convert |A| to uint32_t implicitly. If this works, great. If not,
michael@0 164 * we'll error out.
michael@0 165 */
michael@0 166 return detail::AddU32ToHash(hash, a);
michael@0 167 }
michael@0 168
michael@0 169 template<typename A>
michael@0 170 MOZ_WARN_UNUSED_RESULT
michael@0 171 inline uint32_t
michael@0 172 AddToHash(uint32_t hash, A* a)
michael@0 173 {
michael@0 174 /*
michael@0 175 * You might think this function should just take a void*. But then we'd only
michael@0 176 * catch data pointers and couldn't handle function pointers.
michael@0 177 */
michael@0 178
michael@0 179 static_assert(sizeof(a) == sizeof(uintptr_t),
michael@0 180 "Strange pointer!");
michael@0 181
michael@0 182 return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a));
michael@0 183 }
michael@0 184
michael@0 185 template<>
michael@0 186 MOZ_WARN_UNUSED_RESULT
michael@0 187 inline uint32_t
michael@0 188 AddToHash(uint32_t hash, uintptr_t a)
michael@0 189 {
michael@0 190 return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, a);
michael@0 191 }
michael@0 192
michael@0 193 template<typename A, typename B>
michael@0 194 MOZ_WARN_UNUSED_RESULT
michael@0 195 uint32_t
michael@0 196 AddToHash(uint32_t hash, A a, B b)
michael@0 197 {
michael@0 198 return AddToHash(AddToHash(hash, a), b);
michael@0 199 }
michael@0 200
michael@0 201 template<typename A, typename B, typename C>
michael@0 202 MOZ_WARN_UNUSED_RESULT
michael@0 203 uint32_t
michael@0 204 AddToHash(uint32_t hash, A a, B b, C c)
michael@0 205 {
michael@0 206 return AddToHash(AddToHash(hash, a, b), c);
michael@0 207 }
michael@0 208
michael@0 209 template<typename A, typename B, typename C, typename D>
michael@0 210 MOZ_WARN_UNUSED_RESULT
michael@0 211 uint32_t
michael@0 212 AddToHash(uint32_t hash, A a, B b, C c, D d)
michael@0 213 {
michael@0 214 return AddToHash(AddToHash(hash, a, b, c), d);
michael@0 215 }
michael@0 216
michael@0 217 template<typename A, typename B, typename C, typename D, typename E>
michael@0 218 MOZ_WARN_UNUSED_RESULT
michael@0 219 uint32_t
michael@0 220 AddToHash(uint32_t hash, A a, B b, C c, D d, E e)
michael@0 221 {
michael@0 222 return AddToHash(AddToHash(hash, a, b, c, d), e);
michael@0 223 }
michael@0 224
michael@0 225 /**
michael@0 226 * The HashGeneric class of functions let you hash one or more values.
michael@0 227 *
michael@0 228 * If you want to hash together two values x and y, calling HashGeneric(x, y) is
michael@0 229 * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes
michael@0 230 * that x has already been hashed.
michael@0 231 */
michael@0 232 template<typename A>
michael@0 233 MOZ_WARN_UNUSED_RESULT
michael@0 234 inline uint32_t
michael@0 235 HashGeneric(A a)
michael@0 236 {
michael@0 237 return AddToHash(0, a);
michael@0 238 }
michael@0 239
michael@0 240 template<typename A, typename B>
michael@0 241 MOZ_WARN_UNUSED_RESULT
michael@0 242 inline uint32_t
michael@0 243 HashGeneric(A a, B b)
michael@0 244 {
michael@0 245 return AddToHash(0, a, b);
michael@0 246 }
michael@0 247
michael@0 248 template<typename A, typename B, typename C>
michael@0 249 MOZ_WARN_UNUSED_RESULT
michael@0 250 inline uint32_t
michael@0 251 HashGeneric(A a, B b, C c)
michael@0 252 {
michael@0 253 return AddToHash(0, a, b, c);
michael@0 254 }
michael@0 255
michael@0 256 template<typename A, typename B, typename C, typename D>
michael@0 257 MOZ_WARN_UNUSED_RESULT
michael@0 258 inline uint32_t
michael@0 259 HashGeneric(A a, B b, C c, D d)
michael@0 260 {
michael@0 261 return AddToHash(0, a, b, c, d);
michael@0 262 }
michael@0 263
michael@0 264 template<typename A, typename B, typename C, typename D, typename E>
michael@0 265 MOZ_WARN_UNUSED_RESULT
michael@0 266 inline uint32_t
michael@0 267 HashGeneric(A a, B b, C c, D d, E e)
michael@0 268 {
michael@0 269 return AddToHash(0, a, b, c, d, e);
michael@0 270 }
michael@0 271
michael@0 272 namespace detail {
michael@0 273
michael@0 274 template<typename T>
michael@0 275 uint32_t
michael@0 276 HashUntilZero(const T* str)
michael@0 277 {
michael@0 278 uint32_t hash = 0;
michael@0 279 for (T c; (c = *str); str++)
michael@0 280 hash = AddToHash(hash, c);
michael@0 281 return hash;
michael@0 282 }
michael@0 283
michael@0 284 template<typename T>
michael@0 285 uint32_t
michael@0 286 HashKnownLength(const T* str, size_t length)
michael@0 287 {
michael@0 288 uint32_t hash = 0;
michael@0 289 for (size_t i = 0; i < length; i++)
michael@0 290 hash = AddToHash(hash, str[i]);
michael@0 291 return hash;
michael@0 292 }
michael@0 293
michael@0 294 } /* namespace detail */
michael@0 295
michael@0 296 /**
michael@0 297 * The HashString overloads below do just what you'd expect.
michael@0 298 *
michael@0 299 * If you have the string's length, you might as well call the overload which
michael@0 300 * includes the length. It may be marginally faster.
michael@0 301 */
michael@0 302 MOZ_WARN_UNUSED_RESULT
michael@0 303 inline uint32_t
michael@0 304 HashString(const char* str)
michael@0 305 {
michael@0 306 return detail::HashUntilZero(str);
michael@0 307 }
michael@0 308
michael@0 309 MOZ_WARN_UNUSED_RESULT
michael@0 310 inline uint32_t
michael@0 311 HashString(const char* str, size_t length)
michael@0 312 {
michael@0 313 return detail::HashKnownLength(str, length);
michael@0 314 }
michael@0 315
michael@0 316 MOZ_WARN_UNUSED_RESULT
michael@0 317 inline uint32_t
michael@0 318 HashString(const uint16_t* str)
michael@0 319 {
michael@0 320 return detail::HashUntilZero(str);
michael@0 321 }
michael@0 322
michael@0 323 MOZ_WARN_UNUSED_RESULT
michael@0 324 inline uint32_t
michael@0 325 HashString(const uint16_t* str, size_t length)
michael@0 326 {
michael@0 327 return detail::HashKnownLength(str, length);
michael@0 328 }
michael@0 329
michael@0 330 #ifdef MOZ_CHAR16_IS_NOT_WCHAR
michael@0 331 MOZ_WARN_UNUSED_RESULT
michael@0 332 inline uint32_t
michael@0 333 HashString(const char16_t* str)
michael@0 334 {
michael@0 335 return detail::HashUntilZero(str);
michael@0 336 }
michael@0 337
michael@0 338 MOZ_WARN_UNUSED_RESULT
michael@0 339 inline uint32_t
michael@0 340 HashString(const char16_t* str, size_t length)
michael@0 341 {
michael@0 342 return detail::HashKnownLength(str, length);
michael@0 343 }
michael@0 344 #endif
michael@0 345
michael@0 346 /*
michael@0 347 * On Windows, wchar_t (char16_t) is not the same as uint16_t, even though it's
michael@0 348 * the same width!
michael@0 349 */
michael@0 350 #ifdef WIN32
michael@0 351 MOZ_WARN_UNUSED_RESULT
michael@0 352 inline uint32_t
michael@0 353 HashString(const wchar_t* str)
michael@0 354 {
michael@0 355 return detail::HashUntilZero(str);
michael@0 356 }
michael@0 357
michael@0 358 MOZ_WARN_UNUSED_RESULT
michael@0 359 inline uint32_t
michael@0 360 HashString(const wchar_t* str, size_t length)
michael@0 361 {
michael@0 362 return detail::HashKnownLength(str, length);
michael@0 363 }
michael@0 364 #endif
michael@0 365
michael@0 366 /**
michael@0 367 * Hash some number of bytes.
michael@0 368 *
michael@0 369 * This hash walks word-by-word, rather than byte-by-byte, so you won't get the
michael@0 370 * same result out of HashBytes as you would out of HashString.
michael@0 371 */
michael@0 372 MOZ_WARN_UNUSED_RESULT
michael@0 373 extern MFBT_API uint32_t
michael@0 374 HashBytes(const void* bytes, size_t length);
michael@0 375
michael@0 376 } /* namespace mozilla */
michael@0 377 #endif /* __cplusplus */
michael@0 378
michael@0 379 #endif /* mozilla_HashFunctions_h */

mercurial