Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | /* Utilities for hashing. */ |
michael@0 | 8 | |
michael@0 | 9 | /* |
michael@0 | 10 | * This file exports functions for hashing data down to a 32-bit value, |
michael@0 | 11 | * including: |
michael@0 | 12 | * |
michael@0 | 13 | * - HashString Hash a char* or uint16_t/wchar_t* of known or unknown |
michael@0 | 14 | * length. |
michael@0 | 15 | * |
michael@0 | 16 | * - HashBytes Hash a byte array of known length. |
michael@0 | 17 | * |
michael@0 | 18 | * - HashGeneric Hash one or more values. Currently, we support uint32_t, |
michael@0 | 19 | * types which can be implicitly cast to uint32_t, data |
michael@0 | 20 | * pointers, and function pointers. |
michael@0 | 21 | * |
michael@0 | 22 | * - AddToHash Add one or more values to the given hash. This supports the |
michael@0 | 23 | * same list of types as HashGeneric. |
michael@0 | 24 | * |
michael@0 | 25 | * |
michael@0 | 26 | * You can chain these functions together to hash complex objects. For example: |
michael@0 | 27 | * |
michael@0 | 28 | * class ComplexObject |
michael@0 | 29 | * { |
michael@0 | 30 | * char* str; |
michael@0 | 31 | * uint32_t uint1, uint2; |
michael@0 | 32 | * void (*callbackFn)(); |
michael@0 | 33 | * |
michael@0 | 34 | * public: |
michael@0 | 35 | * uint32_t hash() { |
michael@0 | 36 | * uint32_t hash = HashString(str); |
michael@0 | 37 | * hash = AddToHash(hash, uint1, uint2); |
michael@0 | 38 | * return AddToHash(hash, callbackFn); |
michael@0 | 39 | * } |
michael@0 | 40 | * }; |
michael@0 | 41 | * |
michael@0 | 42 | * If you want to hash an nsAString or nsACString, use the HashString functions |
michael@0 | 43 | * in nsHashKeys.h. |
michael@0 | 44 | */ |
michael@0 | 45 | |
michael@0 | 46 | #ifndef mozilla_HashFunctions_h |
michael@0 | 47 | #define mozilla_HashFunctions_h |
michael@0 | 48 | |
michael@0 | 49 | #include "mozilla/Assertions.h" |
michael@0 | 50 | #include "mozilla/Attributes.h" |
michael@0 | 51 | #include "mozilla/Char16.h" |
michael@0 | 52 | #include "mozilla/Types.h" |
michael@0 | 53 | |
michael@0 | 54 | #include <stdint.h> |
michael@0 | 55 | |
michael@0 | 56 | #ifdef __cplusplus |
michael@0 | 57 | namespace mozilla { |
michael@0 | 58 | |
michael@0 | 59 | /** |
michael@0 | 60 | * The golden ratio as a 32-bit fixed-point value. |
michael@0 | 61 | */ |
michael@0 | 62 | static const uint32_t GoldenRatioU32 = 0x9E3779B9U; |
michael@0 | 63 | |
michael@0 | 64 | inline uint32_t |
michael@0 | 65 | RotateBitsLeft32(uint32_t value, uint8_t bits) |
michael@0 | 66 | { |
michael@0 | 67 | MOZ_ASSERT(bits < 32); |
michael@0 | 68 | return (value << bits) | (value >> (32 - bits)); |
michael@0 | 69 | } |
michael@0 | 70 | |
michael@0 | 71 | namespace detail { |
michael@0 | 72 | |
michael@0 | 73 | inline uint32_t |
michael@0 | 74 | AddU32ToHash(uint32_t hash, uint32_t value) |
michael@0 | 75 | { |
michael@0 | 76 | /* |
michael@0 | 77 | * This is the meat of all our hash routines. This hash function is not |
michael@0 | 78 | * particularly sophisticated, but it seems to work well for our mostly |
michael@0 | 79 | * plain-text inputs. Implementation notes follow. |
michael@0 | 80 | * |
michael@0 | 81 | * Our use of the golden ratio here is arbitrary; we could pick almost any |
michael@0 | 82 | * number which: |
michael@0 | 83 | * |
michael@0 | 84 | * * is odd (because otherwise, all our hash values will be even) |
michael@0 | 85 | * |
michael@0 | 86 | * * has a reasonably-even mix of 1's and 0's (consider the extreme case |
michael@0 | 87 | * where we multiply by 0x3 or 0xeffffff -- this will not produce good |
michael@0 | 88 | * mixing across all bits of the hash). |
michael@0 | 89 | * |
michael@0 | 90 | * The rotation length of 5 is also arbitrary, although an odd number is again |
michael@0 | 91 | * preferable so our hash explores the whole universe of possible rotations. |
michael@0 | 92 | * |
michael@0 | 93 | * Finally, we multiply by the golden ratio *after* xor'ing, not before. |
michael@0 | 94 | * Otherwise, if |hash| is 0 (as it often is for the beginning of a message), |
michael@0 | 95 | * the expression |
michael@0 | 96 | * |
michael@0 | 97 | * (GoldenRatioU32 * RotateBitsLeft(hash, 5)) |xor| value |
michael@0 | 98 | * |
michael@0 | 99 | * evaluates to |value|. |
michael@0 | 100 | * |
michael@0 | 101 | * (Number-theoretic aside: Because any odd number |m| is relatively prime to |
michael@0 | 102 | * our modulus (2^32), the list |
michael@0 | 103 | * |
michael@0 | 104 | * [x * m (mod 2^32) for 0 <= x < 2^32] |
michael@0 | 105 | * |
michael@0 | 106 | * has no duplicate elements. This means that multiplying by |m| does not |
michael@0 | 107 | * cause us to skip any possible hash values. |
michael@0 | 108 | * |
michael@0 | 109 | * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the |
michael@0 | 110 | * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely |
michael@0 | 111 | * multiply our hash value by |m| a few times without negating the |
michael@0 | 112 | * multiplicative effect. Our golden ratio constant has order 2^29, which is |
michael@0 | 113 | * more than enough for our purposes.) |
michael@0 | 114 | */ |
michael@0 | 115 | return GoldenRatioU32 * (RotateBitsLeft32(hash, 5) ^ value); |
michael@0 | 116 | } |
michael@0 | 117 | |
michael@0 | 118 | /** |
michael@0 | 119 | * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter. |
michael@0 | 120 | */ |
michael@0 | 121 | template<size_t PtrSize> |
michael@0 | 122 | inline uint32_t |
michael@0 | 123 | AddUintptrToHash(uint32_t hash, uintptr_t value); |
michael@0 | 124 | |
michael@0 | 125 | template<> |
michael@0 | 126 | inline uint32_t |
michael@0 | 127 | AddUintptrToHash<4>(uint32_t hash, uintptr_t value) |
michael@0 | 128 | { |
michael@0 | 129 | return AddU32ToHash(hash, static_cast<uint32_t>(value)); |
michael@0 | 130 | } |
michael@0 | 131 | |
michael@0 | 132 | template<> |
michael@0 | 133 | inline uint32_t |
michael@0 | 134 | AddUintptrToHash<8>(uint32_t hash, uintptr_t value) |
michael@0 | 135 | { |
michael@0 | 136 | /* |
michael@0 | 137 | * The static cast to uint64_t below is necessary because this function |
michael@0 | 138 | * sometimes gets compiled on 32-bit platforms (yes, even though it's a |
michael@0 | 139 | * template and we never call this particular override in a 32-bit build). If |
michael@0 | 140 | * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t |
michael@0 | 141 | * right 32 bits, and the compiler throws an error. |
michael@0 | 142 | */ |
michael@0 | 143 | uint32_t v1 = static_cast<uint32_t>(value); |
michael@0 | 144 | uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32); |
michael@0 | 145 | return AddU32ToHash(AddU32ToHash(hash, v1), v2); |
michael@0 | 146 | } |
michael@0 | 147 | |
michael@0 | 148 | } /* namespace detail */ |
michael@0 | 149 | |
michael@0 | 150 | /** |
michael@0 | 151 | * AddToHash takes a hash and some values and returns a new hash based on the |
michael@0 | 152 | * inputs. |
michael@0 | 153 | * |
michael@0 | 154 | * Currently, we support hashing uint32_t's, values which we can implicitly |
michael@0 | 155 | * convert to uint32_t, data pointers, and function pointers. |
michael@0 | 156 | */ |
michael@0 | 157 | template<typename A> |
michael@0 | 158 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 159 | inline uint32_t |
michael@0 | 160 | AddToHash(uint32_t hash, A a) |
michael@0 | 161 | { |
michael@0 | 162 | /* |
michael@0 | 163 | * Try to convert |A| to uint32_t implicitly. If this works, great. If not, |
michael@0 | 164 | * we'll error out. |
michael@0 | 165 | */ |
michael@0 | 166 | return detail::AddU32ToHash(hash, a); |
michael@0 | 167 | } |
michael@0 | 168 | |
michael@0 | 169 | template<typename A> |
michael@0 | 170 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 171 | inline uint32_t |
michael@0 | 172 | AddToHash(uint32_t hash, A* a) |
michael@0 | 173 | { |
michael@0 | 174 | /* |
michael@0 | 175 | * You might think this function should just take a void*. But then we'd only |
michael@0 | 176 | * catch data pointers and couldn't handle function pointers. |
michael@0 | 177 | */ |
michael@0 | 178 | |
michael@0 | 179 | static_assert(sizeof(a) == sizeof(uintptr_t), |
michael@0 | 180 | "Strange pointer!"); |
michael@0 | 181 | |
michael@0 | 182 | return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a)); |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | template<> |
michael@0 | 186 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 187 | inline uint32_t |
michael@0 | 188 | AddToHash(uint32_t hash, uintptr_t a) |
michael@0 | 189 | { |
michael@0 | 190 | return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, a); |
michael@0 | 191 | } |
michael@0 | 192 | |
michael@0 | 193 | template<typename A, typename B> |
michael@0 | 194 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 195 | uint32_t |
michael@0 | 196 | AddToHash(uint32_t hash, A a, B b) |
michael@0 | 197 | { |
michael@0 | 198 | return AddToHash(AddToHash(hash, a), b); |
michael@0 | 199 | } |
michael@0 | 200 | |
michael@0 | 201 | template<typename A, typename B, typename C> |
michael@0 | 202 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 203 | uint32_t |
michael@0 | 204 | AddToHash(uint32_t hash, A a, B b, C c) |
michael@0 | 205 | { |
michael@0 | 206 | return AddToHash(AddToHash(hash, a, b), c); |
michael@0 | 207 | } |
michael@0 | 208 | |
michael@0 | 209 | template<typename A, typename B, typename C, typename D> |
michael@0 | 210 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 211 | uint32_t |
michael@0 | 212 | AddToHash(uint32_t hash, A a, B b, C c, D d) |
michael@0 | 213 | { |
michael@0 | 214 | return AddToHash(AddToHash(hash, a, b, c), d); |
michael@0 | 215 | } |
michael@0 | 216 | |
michael@0 | 217 | template<typename A, typename B, typename C, typename D, typename E> |
michael@0 | 218 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 219 | uint32_t |
michael@0 | 220 | AddToHash(uint32_t hash, A a, B b, C c, D d, E e) |
michael@0 | 221 | { |
michael@0 | 222 | return AddToHash(AddToHash(hash, a, b, c, d), e); |
michael@0 | 223 | } |
michael@0 | 224 | |
michael@0 | 225 | /** |
michael@0 | 226 | * The HashGeneric class of functions let you hash one or more values. |
michael@0 | 227 | * |
michael@0 | 228 | * If you want to hash together two values x and y, calling HashGeneric(x, y) is |
michael@0 | 229 | * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes |
michael@0 | 230 | * that x has already been hashed. |
michael@0 | 231 | */ |
michael@0 | 232 | template<typename A> |
michael@0 | 233 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 234 | inline uint32_t |
michael@0 | 235 | HashGeneric(A a) |
michael@0 | 236 | { |
michael@0 | 237 | return AddToHash(0, a); |
michael@0 | 238 | } |
michael@0 | 239 | |
michael@0 | 240 | template<typename A, typename B> |
michael@0 | 241 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 242 | inline uint32_t |
michael@0 | 243 | HashGeneric(A a, B b) |
michael@0 | 244 | { |
michael@0 | 245 | return AddToHash(0, a, b); |
michael@0 | 246 | } |
michael@0 | 247 | |
michael@0 | 248 | template<typename A, typename B, typename C> |
michael@0 | 249 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 250 | inline uint32_t |
michael@0 | 251 | HashGeneric(A a, B b, C c) |
michael@0 | 252 | { |
michael@0 | 253 | return AddToHash(0, a, b, c); |
michael@0 | 254 | } |
michael@0 | 255 | |
michael@0 | 256 | template<typename A, typename B, typename C, typename D> |
michael@0 | 257 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 258 | inline uint32_t |
michael@0 | 259 | HashGeneric(A a, B b, C c, D d) |
michael@0 | 260 | { |
michael@0 | 261 | return AddToHash(0, a, b, c, d); |
michael@0 | 262 | } |
michael@0 | 263 | |
michael@0 | 264 | template<typename A, typename B, typename C, typename D, typename E> |
michael@0 | 265 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 266 | inline uint32_t |
michael@0 | 267 | HashGeneric(A a, B b, C c, D d, E e) |
michael@0 | 268 | { |
michael@0 | 269 | return AddToHash(0, a, b, c, d, e); |
michael@0 | 270 | } |
michael@0 | 271 | |
michael@0 | 272 | namespace detail { |
michael@0 | 273 | |
michael@0 | 274 | template<typename T> |
michael@0 | 275 | uint32_t |
michael@0 | 276 | HashUntilZero(const T* str) |
michael@0 | 277 | { |
michael@0 | 278 | uint32_t hash = 0; |
michael@0 | 279 | for (T c; (c = *str); str++) |
michael@0 | 280 | hash = AddToHash(hash, c); |
michael@0 | 281 | return hash; |
michael@0 | 282 | } |
michael@0 | 283 | |
michael@0 | 284 | template<typename T> |
michael@0 | 285 | uint32_t |
michael@0 | 286 | HashKnownLength(const T* str, size_t length) |
michael@0 | 287 | { |
michael@0 | 288 | uint32_t hash = 0; |
michael@0 | 289 | for (size_t i = 0; i < length; i++) |
michael@0 | 290 | hash = AddToHash(hash, str[i]); |
michael@0 | 291 | return hash; |
michael@0 | 292 | } |
michael@0 | 293 | |
michael@0 | 294 | } /* namespace detail */ |
michael@0 | 295 | |
michael@0 | 296 | /** |
michael@0 | 297 | * The HashString overloads below do just what you'd expect. |
michael@0 | 298 | * |
michael@0 | 299 | * If you have the string's length, you might as well call the overload which |
michael@0 | 300 | * includes the length. It may be marginally faster. |
michael@0 | 301 | */ |
michael@0 | 302 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 303 | inline uint32_t |
michael@0 | 304 | HashString(const char* str) |
michael@0 | 305 | { |
michael@0 | 306 | return detail::HashUntilZero(str); |
michael@0 | 307 | } |
michael@0 | 308 | |
michael@0 | 309 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 310 | inline uint32_t |
michael@0 | 311 | HashString(const char* str, size_t length) |
michael@0 | 312 | { |
michael@0 | 313 | return detail::HashKnownLength(str, length); |
michael@0 | 314 | } |
michael@0 | 315 | |
michael@0 | 316 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 317 | inline uint32_t |
michael@0 | 318 | HashString(const uint16_t* str) |
michael@0 | 319 | { |
michael@0 | 320 | return detail::HashUntilZero(str); |
michael@0 | 321 | } |
michael@0 | 322 | |
michael@0 | 323 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 324 | inline uint32_t |
michael@0 | 325 | HashString(const uint16_t* str, size_t length) |
michael@0 | 326 | { |
michael@0 | 327 | return detail::HashKnownLength(str, length); |
michael@0 | 328 | } |
michael@0 | 329 | |
michael@0 | 330 | #ifdef MOZ_CHAR16_IS_NOT_WCHAR |
michael@0 | 331 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 332 | inline uint32_t |
michael@0 | 333 | HashString(const char16_t* str) |
michael@0 | 334 | { |
michael@0 | 335 | return detail::HashUntilZero(str); |
michael@0 | 336 | } |
michael@0 | 337 | |
michael@0 | 338 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 339 | inline uint32_t |
michael@0 | 340 | HashString(const char16_t* str, size_t length) |
michael@0 | 341 | { |
michael@0 | 342 | return detail::HashKnownLength(str, length); |
michael@0 | 343 | } |
michael@0 | 344 | #endif |
michael@0 | 345 | |
michael@0 | 346 | /* |
michael@0 | 347 | * On Windows, wchar_t (char16_t) is not the same as uint16_t, even though it's |
michael@0 | 348 | * the same width! |
michael@0 | 349 | */ |
michael@0 | 350 | #ifdef WIN32 |
michael@0 | 351 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 352 | inline uint32_t |
michael@0 | 353 | HashString(const wchar_t* str) |
michael@0 | 354 | { |
michael@0 | 355 | return detail::HashUntilZero(str); |
michael@0 | 356 | } |
michael@0 | 357 | |
michael@0 | 358 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 359 | inline uint32_t |
michael@0 | 360 | HashString(const wchar_t* str, size_t length) |
michael@0 | 361 | { |
michael@0 | 362 | return detail::HashKnownLength(str, length); |
michael@0 | 363 | } |
michael@0 | 364 | #endif |
michael@0 | 365 | |
michael@0 | 366 | /** |
michael@0 | 367 | * Hash some number of bytes. |
michael@0 | 368 | * |
michael@0 | 369 | * This hash walks word-by-word, rather than byte-by-byte, so you won't get the |
michael@0 | 370 | * same result out of HashBytes as you would out of HashString. |
michael@0 | 371 | */ |
michael@0 | 372 | MOZ_WARN_UNUSED_RESULT |
michael@0 | 373 | extern MFBT_API uint32_t |
michael@0 | 374 | HashBytes(const void* bytes, size_t length); |
michael@0 | 375 | |
michael@0 | 376 | } /* namespace mozilla */ |
michael@0 | 377 | #endif /* __cplusplus */ |
michael@0 | 378 | |
michael@0 | 379 | #endif /* mozilla_HashFunctions_h */ |