1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/mfbt/HashFunctions.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,379 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +/* Utilities for hashing. */ 1.11 + 1.12 +/* 1.13 + * This file exports functions for hashing data down to a 32-bit value, 1.14 + * including: 1.15 + * 1.16 + * - HashString Hash a char* or uint16_t/wchar_t* of known or unknown 1.17 + * length. 1.18 + * 1.19 + * - HashBytes Hash a byte array of known length. 1.20 + * 1.21 + * - HashGeneric Hash one or more values. Currently, we support uint32_t, 1.22 + * types which can be implicitly cast to uint32_t, data 1.23 + * pointers, and function pointers. 1.24 + * 1.25 + * - AddToHash Add one or more values to the given hash. This supports the 1.26 + * same list of types as HashGeneric. 1.27 + * 1.28 + * 1.29 + * You can chain these functions together to hash complex objects. For example: 1.30 + * 1.31 + * class ComplexObject 1.32 + * { 1.33 + * char* str; 1.34 + * uint32_t uint1, uint2; 1.35 + * void (*callbackFn)(); 1.36 + * 1.37 + * public: 1.38 + * uint32_t hash() { 1.39 + * uint32_t hash = HashString(str); 1.40 + * hash = AddToHash(hash, uint1, uint2); 1.41 + * return AddToHash(hash, callbackFn); 1.42 + * } 1.43 + * }; 1.44 + * 1.45 + * If you want to hash an nsAString or nsACString, use the HashString functions 1.46 + * in nsHashKeys.h. 1.47 + */ 1.48 + 1.49 +#ifndef mozilla_HashFunctions_h 1.50 +#define mozilla_HashFunctions_h 1.51 + 1.52 +#include "mozilla/Assertions.h" 1.53 +#include "mozilla/Attributes.h" 1.54 +#include "mozilla/Char16.h" 1.55 +#include "mozilla/Types.h" 1.56 + 1.57 +#include <stdint.h> 1.58 + 1.59 +#ifdef __cplusplus 1.60 +namespace mozilla { 1.61 + 1.62 +/** 1.63 + * The golden ratio as a 32-bit fixed-point value. 1.64 + */ 1.65 +static const uint32_t GoldenRatioU32 = 0x9E3779B9U; 1.66 + 1.67 +inline uint32_t 1.68 +RotateBitsLeft32(uint32_t value, uint8_t bits) 1.69 +{ 1.70 + MOZ_ASSERT(bits < 32); 1.71 + return (value << bits) | (value >> (32 - bits)); 1.72 +} 1.73 + 1.74 +namespace detail { 1.75 + 1.76 +inline uint32_t 1.77 +AddU32ToHash(uint32_t hash, uint32_t value) 1.78 +{ 1.79 + /* 1.80 + * This is the meat of all our hash routines. This hash function is not 1.81 + * particularly sophisticated, but it seems to work well for our mostly 1.82 + * plain-text inputs. Implementation notes follow. 1.83 + * 1.84 + * Our use of the golden ratio here is arbitrary; we could pick almost any 1.85 + * number which: 1.86 + * 1.87 + * * is odd (because otherwise, all our hash values will be even) 1.88 + * 1.89 + * * has a reasonably-even mix of 1's and 0's (consider the extreme case 1.90 + * where we multiply by 0x3 or 0xeffffff -- this will not produce good 1.91 + * mixing across all bits of the hash). 1.92 + * 1.93 + * The rotation length of 5 is also arbitrary, although an odd number is again 1.94 + * preferable so our hash explores the whole universe of possible rotations. 1.95 + * 1.96 + * Finally, we multiply by the golden ratio *after* xor'ing, not before. 1.97 + * Otherwise, if |hash| is 0 (as it often is for the beginning of a message), 1.98 + * the expression 1.99 + * 1.100 + * (GoldenRatioU32 * RotateBitsLeft(hash, 5)) |xor| value 1.101 + * 1.102 + * evaluates to |value|. 1.103 + * 1.104 + * (Number-theoretic aside: Because any odd number |m| is relatively prime to 1.105 + * our modulus (2^32), the list 1.106 + * 1.107 + * [x * m (mod 2^32) for 0 <= x < 2^32] 1.108 + * 1.109 + * has no duplicate elements. This means that multiplying by |m| does not 1.110 + * cause us to skip any possible hash values. 1.111 + * 1.112 + * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the 1.113 + * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely 1.114 + * multiply our hash value by |m| a few times without negating the 1.115 + * multiplicative effect. Our golden ratio constant has order 2^29, which is 1.116 + * more than enough for our purposes.) 1.117 + */ 1.118 + return GoldenRatioU32 * (RotateBitsLeft32(hash, 5) ^ value); 1.119 +} 1.120 + 1.121 +/** 1.122 + * AddUintptrToHash takes sizeof(uintptr_t) as a template parameter. 1.123 + */ 1.124 +template<size_t PtrSize> 1.125 +inline uint32_t 1.126 +AddUintptrToHash(uint32_t hash, uintptr_t value); 1.127 + 1.128 +template<> 1.129 +inline uint32_t 1.130 +AddUintptrToHash<4>(uint32_t hash, uintptr_t value) 1.131 +{ 1.132 + return AddU32ToHash(hash, static_cast<uint32_t>(value)); 1.133 +} 1.134 + 1.135 +template<> 1.136 +inline uint32_t 1.137 +AddUintptrToHash<8>(uint32_t hash, uintptr_t value) 1.138 +{ 1.139 + /* 1.140 + * The static cast to uint64_t below is necessary because this function 1.141 + * sometimes gets compiled on 32-bit platforms (yes, even though it's a 1.142 + * template and we never call this particular override in a 32-bit build). If 1.143 + * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t 1.144 + * right 32 bits, and the compiler throws an error. 1.145 + */ 1.146 + uint32_t v1 = static_cast<uint32_t>(value); 1.147 + uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32); 1.148 + return AddU32ToHash(AddU32ToHash(hash, v1), v2); 1.149 +} 1.150 + 1.151 +} /* namespace detail */ 1.152 + 1.153 +/** 1.154 + * AddToHash takes a hash and some values and returns a new hash based on the 1.155 + * inputs. 1.156 + * 1.157 + * Currently, we support hashing uint32_t's, values which we can implicitly 1.158 + * convert to uint32_t, data pointers, and function pointers. 1.159 + */ 1.160 +template<typename A> 1.161 +MOZ_WARN_UNUSED_RESULT 1.162 +inline uint32_t 1.163 +AddToHash(uint32_t hash, A a) 1.164 +{ 1.165 + /* 1.166 + * Try to convert |A| to uint32_t implicitly. If this works, great. If not, 1.167 + * we'll error out. 1.168 + */ 1.169 + return detail::AddU32ToHash(hash, a); 1.170 +} 1.171 + 1.172 +template<typename A> 1.173 +MOZ_WARN_UNUSED_RESULT 1.174 +inline uint32_t 1.175 +AddToHash(uint32_t hash, A* a) 1.176 +{ 1.177 + /* 1.178 + * You might think this function should just take a void*. But then we'd only 1.179 + * catch data pointers and couldn't handle function pointers. 1.180 + */ 1.181 + 1.182 + static_assert(sizeof(a) == sizeof(uintptr_t), 1.183 + "Strange pointer!"); 1.184 + 1.185 + return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a)); 1.186 +} 1.187 + 1.188 +template<> 1.189 +MOZ_WARN_UNUSED_RESULT 1.190 +inline uint32_t 1.191 +AddToHash(uint32_t hash, uintptr_t a) 1.192 +{ 1.193 + return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, a); 1.194 +} 1.195 + 1.196 +template<typename A, typename B> 1.197 +MOZ_WARN_UNUSED_RESULT 1.198 +uint32_t 1.199 +AddToHash(uint32_t hash, A a, B b) 1.200 +{ 1.201 + return AddToHash(AddToHash(hash, a), b); 1.202 +} 1.203 + 1.204 +template<typename A, typename B, typename C> 1.205 +MOZ_WARN_UNUSED_RESULT 1.206 +uint32_t 1.207 +AddToHash(uint32_t hash, A a, B b, C c) 1.208 +{ 1.209 + return AddToHash(AddToHash(hash, a, b), c); 1.210 +} 1.211 + 1.212 +template<typename A, typename B, typename C, typename D> 1.213 +MOZ_WARN_UNUSED_RESULT 1.214 +uint32_t 1.215 +AddToHash(uint32_t hash, A a, B b, C c, D d) 1.216 +{ 1.217 + return AddToHash(AddToHash(hash, a, b, c), d); 1.218 +} 1.219 + 1.220 +template<typename A, typename B, typename C, typename D, typename E> 1.221 +MOZ_WARN_UNUSED_RESULT 1.222 +uint32_t 1.223 +AddToHash(uint32_t hash, A a, B b, C c, D d, E e) 1.224 +{ 1.225 + return AddToHash(AddToHash(hash, a, b, c, d), e); 1.226 +} 1.227 + 1.228 +/** 1.229 + * The HashGeneric class of functions let you hash one or more values. 1.230 + * 1.231 + * If you want to hash together two values x and y, calling HashGeneric(x, y) is 1.232 + * much better than calling AddToHash(x, y), because AddToHash(x, y) assumes 1.233 + * that x has already been hashed. 1.234 + */ 1.235 +template<typename A> 1.236 +MOZ_WARN_UNUSED_RESULT 1.237 +inline uint32_t 1.238 +HashGeneric(A a) 1.239 +{ 1.240 + return AddToHash(0, a); 1.241 +} 1.242 + 1.243 +template<typename A, typename B> 1.244 +MOZ_WARN_UNUSED_RESULT 1.245 +inline uint32_t 1.246 +HashGeneric(A a, B b) 1.247 +{ 1.248 + return AddToHash(0, a, b); 1.249 +} 1.250 + 1.251 +template<typename A, typename B, typename C> 1.252 +MOZ_WARN_UNUSED_RESULT 1.253 +inline uint32_t 1.254 +HashGeneric(A a, B b, C c) 1.255 +{ 1.256 + return AddToHash(0, a, b, c); 1.257 +} 1.258 + 1.259 +template<typename A, typename B, typename C, typename D> 1.260 +MOZ_WARN_UNUSED_RESULT 1.261 +inline uint32_t 1.262 +HashGeneric(A a, B b, C c, D d) 1.263 +{ 1.264 + return AddToHash(0, a, b, c, d); 1.265 +} 1.266 + 1.267 +template<typename A, typename B, typename C, typename D, typename E> 1.268 +MOZ_WARN_UNUSED_RESULT 1.269 +inline uint32_t 1.270 +HashGeneric(A a, B b, C c, D d, E e) 1.271 +{ 1.272 + return AddToHash(0, a, b, c, d, e); 1.273 +} 1.274 + 1.275 +namespace detail { 1.276 + 1.277 +template<typename T> 1.278 +uint32_t 1.279 +HashUntilZero(const T* str) 1.280 +{ 1.281 + uint32_t hash = 0; 1.282 + for (T c; (c = *str); str++) 1.283 + hash = AddToHash(hash, c); 1.284 + return hash; 1.285 +} 1.286 + 1.287 +template<typename T> 1.288 +uint32_t 1.289 +HashKnownLength(const T* str, size_t length) 1.290 +{ 1.291 + uint32_t hash = 0; 1.292 + for (size_t i = 0; i < length; i++) 1.293 + hash = AddToHash(hash, str[i]); 1.294 + return hash; 1.295 +} 1.296 + 1.297 +} /* namespace detail */ 1.298 + 1.299 +/** 1.300 + * The HashString overloads below do just what you'd expect. 1.301 + * 1.302 + * If you have the string's length, you might as well call the overload which 1.303 + * includes the length. It may be marginally faster. 1.304 + */ 1.305 +MOZ_WARN_UNUSED_RESULT 1.306 +inline uint32_t 1.307 +HashString(const char* str) 1.308 +{ 1.309 + return detail::HashUntilZero(str); 1.310 +} 1.311 + 1.312 +MOZ_WARN_UNUSED_RESULT 1.313 +inline uint32_t 1.314 +HashString(const char* str, size_t length) 1.315 +{ 1.316 + return detail::HashKnownLength(str, length); 1.317 +} 1.318 + 1.319 +MOZ_WARN_UNUSED_RESULT 1.320 +inline uint32_t 1.321 +HashString(const uint16_t* str) 1.322 +{ 1.323 + return detail::HashUntilZero(str); 1.324 +} 1.325 + 1.326 +MOZ_WARN_UNUSED_RESULT 1.327 +inline uint32_t 1.328 +HashString(const uint16_t* str, size_t length) 1.329 +{ 1.330 + return detail::HashKnownLength(str, length); 1.331 +} 1.332 + 1.333 +#ifdef MOZ_CHAR16_IS_NOT_WCHAR 1.334 +MOZ_WARN_UNUSED_RESULT 1.335 +inline uint32_t 1.336 +HashString(const char16_t* str) 1.337 +{ 1.338 + return detail::HashUntilZero(str); 1.339 +} 1.340 + 1.341 +MOZ_WARN_UNUSED_RESULT 1.342 +inline uint32_t 1.343 +HashString(const char16_t* str, size_t length) 1.344 +{ 1.345 + return detail::HashKnownLength(str, length); 1.346 +} 1.347 +#endif 1.348 + 1.349 +/* 1.350 + * On Windows, wchar_t (char16_t) is not the same as uint16_t, even though it's 1.351 + * the same width! 1.352 + */ 1.353 +#ifdef WIN32 1.354 +MOZ_WARN_UNUSED_RESULT 1.355 +inline uint32_t 1.356 +HashString(const wchar_t* str) 1.357 +{ 1.358 + return detail::HashUntilZero(str); 1.359 +} 1.360 + 1.361 +MOZ_WARN_UNUSED_RESULT 1.362 +inline uint32_t 1.363 +HashString(const wchar_t* str, size_t length) 1.364 +{ 1.365 + return detail::HashKnownLength(str, length); 1.366 +} 1.367 +#endif 1.368 + 1.369 +/** 1.370 + * Hash some number of bytes. 1.371 + * 1.372 + * This hash walks word-by-word, rather than byte-by-byte, so you won't get the 1.373 + * same result out of HashBytes as you would out of HashString. 1.374 + */ 1.375 +MOZ_WARN_UNUSED_RESULT 1.376 +extern MFBT_API uint32_t 1.377 +HashBytes(const void* bytes, size_t length); 1.378 + 1.379 +} /* namespace mozilla */ 1.380 +#endif /* __cplusplus */ 1.381 + 1.382 +#endif /* mozilla_HashFunctions_h */