1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/uhash.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,973 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* Copyright (C) 1997-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +****************************************************************************** 1.9 +* Date Name Description 1.10 +* 03/22/00 aliu Adapted from original C++ ICU Hashtable. 1.11 +* 07/06/01 aliu Modified to support int32_t keys on 1.12 +* platforms with sizeof(void*) < 32. 1.13 +****************************************************************************** 1.14 +*/ 1.15 + 1.16 +#include "uhash.h" 1.17 +#include "unicode/ustring.h" 1.18 +#include "cstring.h" 1.19 +#include "cmemory.h" 1.20 +#include "uassert.h" 1.21 +#include "ustr_imp.h" 1.22 + 1.23 +/* This hashtable is implemented as a double hash. All elements are 1.24 + * stored in a single array with no secondary storage for collision 1.25 + * resolution (no linked list, etc.). When there is a hash collision 1.26 + * (when two unequal keys have the same hashcode) we resolve this by 1.27 + * using a secondary hash. The secondary hash is an increment 1.28 + * computed as a hash function (a different one) of the primary 1.29 + * hashcode. This increment is added to the initial hash value to 1.30 + * obtain further slots assigned to the same hash code. For this to 1.31 + * work, the length of the array and the increment must be relatively 1.32 + * prime. The easiest way to achieve this is to have the length of 1.33 + * the array be prime, and the increment be any value from 1.34 + * 1..length-1. 1.35 + * 1.36 + * Hashcodes are 32-bit integers. We make sure all hashcodes are 1.37 + * non-negative by masking off the top bit. This has two effects: (1) 1.38 + * modulo arithmetic is simplified. If we allowed negative hashcodes, 1.39 + * then when we computed hashcode % length, we could get a negative 1.40 + * result, which we would then have to adjust back into range. It's 1.41 + * simpler to just make hashcodes non-negative. (2) It makes it easy 1.42 + * to check for empty vs. occupied slots in the table. We just mark 1.43 + * empty or deleted slots with a negative hashcode. 1.44 + * 1.45 + * The central function is _uhash_find(). This function looks for a 1.46 + * slot matching the given key and hashcode. If one is found, it 1.47 + * returns a pointer to that slot. If the table is full, and no match 1.48 + * is found, it returns NULL -- in theory. This would make the code 1.49 + * more complicated, since all callers of _uhash_find() would then 1.50 + * have to check for a NULL result. To keep this from happening, we 1.51 + * don't allow the table to fill. When there is only one 1.52 + * empty/deleted slot left, uhash_put() will refuse to increase the 1.53 + * count, and fail. This simplifies the code. In practice, one will 1.54 + * seldom encounter this using default UHashtables. However, if a 1.55 + * hashtable is set to a U_FIXED resize policy, or if memory is 1.56 + * exhausted, then the table may fill. 1.57 + * 1.58 + * High and low water ratios control rehashing. They establish levels 1.59 + * of fullness (from 0 to 1) outside of which the data array is 1.60 + * reallocated and repopulated. Setting the low water ratio to zero 1.61 + * means the table will never shrink. Setting the high water ratio to 1.62 + * one means the table will never grow. The ratios should be 1.63 + * coordinated with the ratio between successive elements of the 1.64 + * PRIMES table, so that when the primeIndex is incremented or 1.65 + * decremented during rehashing, it brings the ratio of count / length 1.66 + * back into the desired range (between low and high water ratios). 1.67 + */ 1.68 + 1.69 +/******************************************************************** 1.70 + * PRIVATE Constants, Macros 1.71 + ********************************************************************/ 1.72 + 1.73 +/* This is a list of non-consecutive primes chosen such that 1.74 + * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 1.75 + * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this 1.76 + * ratio is changed, the low and high water ratios should also be 1.77 + * adjusted to suit. 1.78 + * 1.79 + * These prime numbers were also chosen so that they are the largest 1.80 + * prime number while being less than a power of two. 1.81 + */ 1.82 +static const int32_t PRIMES[] = { 1.83 + 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, 1.84 + 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 1.85 + 16777213, 33554393, 67108859, 134217689, 268435399, 536870909, 1.86 + 1073741789, 2147483647 /*, 4294967291 */ 1.87 +}; 1.88 + 1.89 +#define PRIMES_LENGTH (sizeof(PRIMES) / sizeof(PRIMES[0])) 1.90 +#define DEFAULT_PRIME_INDEX 3 1.91 + 1.92 +/* These ratios are tuned to the PRIMES array such that a resize 1.93 + * places the table back into the zone of non-resizing. That is, 1.94 + * after a call to _uhash_rehash(), a subsequent call to 1.95 + * _uhash_rehash() should do nothing (should not churn). This is only 1.96 + * a potential problem with U_GROW_AND_SHRINK. 1.97 + */ 1.98 +static const float RESIZE_POLICY_RATIO_TABLE[6] = { 1.99 + /* low, high water ratio */ 1.100 + 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */ 1.101 + 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */ 1.102 + 0.0F, 1.0F /* U_FIXED: Never change size */ 1.103 +}; 1.104 + 1.105 +/* 1.106 + Invariants for hashcode values: 1.107 + 1.108 + * DELETED < 0 1.109 + * EMPTY < 0 1.110 + * Real hashes >= 0 1.111 + 1.112 + Hashcodes may not start out this way, but internally they are 1.113 + adjusted so that they are always positive. We assume 32-bit 1.114 + hashcodes; adjust these constants for other hashcode sizes. 1.115 +*/ 1.116 +#define HASH_DELETED ((int32_t) 0x80000000) 1.117 +#define HASH_EMPTY ((int32_t) HASH_DELETED + 1) 1.118 + 1.119 +#define IS_EMPTY_OR_DELETED(x) ((x) < 0) 1.120 + 1.121 +/* This macro expects a UHashTok.pointer as its keypointer and 1.122 + valuepointer parameters */ 1.123 +#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \ 1.124 + if (hash->keyDeleter != NULL && keypointer != NULL) { \ 1.125 + (*hash->keyDeleter)(keypointer); \ 1.126 + } \ 1.127 + if (hash->valueDeleter != NULL && valuepointer != NULL) { \ 1.128 + (*hash->valueDeleter)(valuepointer); \ 1.129 + } 1.130 + 1.131 +/* 1.132 + * Constants for hinting whether a key or value is an integer 1.133 + * or a pointer. If a hint bit is zero, then the associated 1.134 + * token is assumed to be an integer. 1.135 + */ 1.136 +#define HINT_KEY_POINTER (1) 1.137 +#define HINT_VALUE_POINTER (2) 1.138 + 1.139 +/******************************************************************** 1.140 + * PRIVATE Implementation 1.141 + ********************************************************************/ 1.142 + 1.143 +static UHashTok 1.144 +_uhash_setElement(UHashtable *hash, UHashElement* e, 1.145 + int32_t hashcode, 1.146 + UHashTok key, UHashTok value, int8_t hint) { 1.147 + 1.148 + UHashTok oldValue = e->value; 1.149 + if (hash->keyDeleter != NULL && e->key.pointer != NULL && 1.150 + e->key.pointer != key.pointer) { /* Avoid double deletion */ 1.151 + (*hash->keyDeleter)(e->key.pointer); 1.152 + } 1.153 + if (hash->valueDeleter != NULL) { 1.154 + if (oldValue.pointer != NULL && 1.155 + oldValue.pointer != value.pointer) { /* Avoid double deletion */ 1.156 + (*hash->valueDeleter)(oldValue.pointer); 1.157 + } 1.158 + oldValue.pointer = NULL; 1.159 + } 1.160 + /* Compilers should copy the UHashTok union correctly, but even if 1.161 + * they do, memory heap tools (e.g. BoundsChecker) can get 1.162 + * confused when a pointer is cloaked in a union and then copied. 1.163 + * TO ALLEVIATE THIS, we use hints (based on what API the user is 1.164 + * calling) to copy pointers when we know the user thinks 1.165 + * something is a pointer. */ 1.166 + if (hint & HINT_KEY_POINTER) { 1.167 + e->key.pointer = key.pointer; 1.168 + } else { 1.169 + e->key = key; 1.170 + } 1.171 + if (hint & HINT_VALUE_POINTER) { 1.172 + e->value.pointer = value.pointer; 1.173 + } else { 1.174 + e->value = value; 1.175 + } 1.176 + e->hashcode = hashcode; 1.177 + return oldValue; 1.178 +} 1.179 + 1.180 +/** 1.181 + * Assumes that the given element is not empty or deleted. 1.182 + */ 1.183 +static UHashTok 1.184 +_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { 1.185 + UHashTok empty; 1.186 + U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode)); 1.187 + --hash->count; 1.188 + empty.pointer = NULL; empty.integer = 0; 1.189 + return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0); 1.190 +} 1.191 + 1.192 +static void 1.193 +_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { 1.194 + U_ASSERT(hash != NULL); 1.195 + U_ASSERT(((int32_t)policy) >= 0); 1.196 + U_ASSERT(((int32_t)policy) < 3); 1.197 + hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2]; 1.198 + hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1]; 1.199 +} 1.200 + 1.201 +/** 1.202 + * Allocate internal data array of a size determined by the given 1.203 + * prime index. If the index is out of range it is pinned into range. 1.204 + * If the allocation fails the status is set to 1.205 + * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In 1.206 + * either case the previous array pointer is overwritten. 1.207 + * 1.208 + * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. 1.209 + */ 1.210 +static void 1.211 +_uhash_allocate(UHashtable *hash, 1.212 + int32_t primeIndex, 1.213 + UErrorCode *status) { 1.214 + 1.215 + UHashElement *p, *limit; 1.216 + UHashTok emptytok; 1.217 + 1.218 + if (U_FAILURE(*status)) return; 1.219 + 1.220 + U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); 1.221 + 1.222 + hash->primeIndex = primeIndex; 1.223 + hash->length = PRIMES[primeIndex]; 1.224 + 1.225 + p = hash->elements = (UHashElement*) 1.226 + uprv_malloc(sizeof(UHashElement) * hash->length); 1.227 + 1.228 + if (hash->elements == NULL) { 1.229 + *status = U_MEMORY_ALLOCATION_ERROR; 1.230 + return; 1.231 + } 1.232 + 1.233 + emptytok.pointer = NULL; /* Only one of these two is needed */ 1.234 + emptytok.integer = 0; /* but we don't know which one. */ 1.235 + 1.236 + limit = p + hash->length; 1.237 + while (p < limit) { 1.238 + p->key = emptytok; 1.239 + p->value = emptytok; 1.240 + p->hashcode = HASH_EMPTY; 1.241 + ++p; 1.242 + } 1.243 + 1.244 + hash->count = 0; 1.245 + hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); 1.246 + hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); 1.247 +} 1.248 + 1.249 +static UHashtable* 1.250 +_uhash_init(UHashtable *result, 1.251 + UHashFunction *keyHash, 1.252 + UKeyComparator *keyComp, 1.253 + UValueComparator *valueComp, 1.254 + int32_t primeIndex, 1.255 + UErrorCode *status) 1.256 +{ 1.257 + if (U_FAILURE(*status)) return NULL; 1.258 + U_ASSERT(keyHash != NULL); 1.259 + U_ASSERT(keyComp != NULL); 1.260 + 1.261 + result->keyHasher = keyHash; 1.262 + result->keyComparator = keyComp; 1.263 + result->valueComparator = valueComp; 1.264 + result->keyDeleter = NULL; 1.265 + result->valueDeleter = NULL; 1.266 + result->allocated = FALSE; 1.267 + _uhash_internalSetResizePolicy(result, U_GROW); 1.268 + 1.269 + _uhash_allocate(result, primeIndex, status); 1.270 + 1.271 + if (U_FAILURE(*status)) { 1.272 + return NULL; 1.273 + } 1.274 + 1.275 + return result; 1.276 +} 1.277 + 1.278 +static UHashtable* 1.279 +_uhash_create(UHashFunction *keyHash, 1.280 + UKeyComparator *keyComp, 1.281 + UValueComparator *valueComp, 1.282 + int32_t primeIndex, 1.283 + UErrorCode *status) { 1.284 + UHashtable *result; 1.285 + 1.286 + if (U_FAILURE(*status)) return NULL; 1.287 + 1.288 + result = (UHashtable*) uprv_malloc(sizeof(UHashtable)); 1.289 + if (result == NULL) { 1.290 + *status = U_MEMORY_ALLOCATION_ERROR; 1.291 + return NULL; 1.292 + } 1.293 + 1.294 + _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status); 1.295 + result->allocated = TRUE; 1.296 + 1.297 + if (U_FAILURE(*status)) { 1.298 + uprv_free(result); 1.299 + return NULL; 1.300 + } 1.301 + 1.302 + return result; 1.303 +} 1.304 + 1.305 +/** 1.306 + * Look for a key in the table, or if no such key exists, the first 1.307 + * empty slot matching the given hashcode. Keys are compared using 1.308 + * the keyComparator function. 1.309 + * 1.310 + * First find the start position, which is the hashcode modulo 1.311 + * the length. Test it to see if it is: 1.312 + * 1.313 + * a. identical: First check the hash values for a quick check, 1.314 + * then compare keys for equality using keyComparator. 1.315 + * b. deleted 1.316 + * c. empty 1.317 + * 1.318 + * Stop if it is identical or empty, otherwise continue by adding a 1.319 + * "jump" value (moduloing by the length again to keep it within 1.320 + * range) and retesting. For efficiency, there need enough empty 1.321 + * values so that the searchs stop within a reasonable amount of time. 1.322 + * This can be changed by changing the high/low water marks. 1.323 + * 1.324 + * In theory, this function can return NULL, if it is full (no empty 1.325 + * or deleted slots) and if no matching key is found. In practice, we 1.326 + * prevent this elsewhere (in uhash_put) by making sure the last slot 1.327 + * in the table is never filled. 1.328 + * 1.329 + * The size of the table should be prime for this algorithm to work; 1.330 + * otherwise we are not guaranteed that the jump value (the secondary 1.331 + * hash) is relatively prime to the table length. 1.332 + */ 1.333 +static UHashElement* 1.334 +_uhash_find(const UHashtable *hash, UHashTok key, 1.335 + int32_t hashcode) { 1.336 + 1.337 + int32_t firstDeleted = -1; /* assume invalid index */ 1.338 + int32_t theIndex, startIndex; 1.339 + int32_t jump = 0; /* lazy evaluate */ 1.340 + int32_t tableHash; 1.341 + UHashElement *elements = hash->elements; 1.342 + 1.343 + hashcode &= 0x7FFFFFFF; /* must be positive */ 1.344 + startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length; 1.345 + 1.346 + do { 1.347 + tableHash = elements[theIndex].hashcode; 1.348 + if (tableHash == hashcode) { /* quick check */ 1.349 + if ((*hash->keyComparator)(key, elements[theIndex].key)) { 1.350 + return &(elements[theIndex]); 1.351 + } 1.352 + } else if (!IS_EMPTY_OR_DELETED(tableHash)) { 1.353 + /* We have hit a slot which contains a key-value pair, 1.354 + * but for which the hash code does not match. Keep 1.355 + * looking. 1.356 + */ 1.357 + } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */ 1.358 + break; 1.359 + } else if (firstDeleted < 0) { /* remember first deleted */ 1.360 + firstDeleted = theIndex; 1.361 + } 1.362 + if (jump == 0) { /* lazy compute jump */ 1.363 + /* The jump value must be relatively prime to the table 1.364 + * length. As long as the length is prime, then any value 1.365 + * 1..length-1 will be relatively prime to it. 1.366 + */ 1.367 + jump = (hashcode % (hash->length - 1)) + 1; 1.368 + } 1.369 + theIndex = (theIndex + jump) % hash->length; 1.370 + } while (theIndex != startIndex); 1.371 + 1.372 + if (firstDeleted >= 0) { 1.373 + theIndex = firstDeleted; /* reset if had deleted slot */ 1.374 + } else if (tableHash != HASH_EMPTY) { 1.375 + /* We get to this point if the hashtable is full (no empty or 1.376 + * deleted slots), and we've failed to find a match. THIS 1.377 + * WILL NEVER HAPPEN as long as uhash_put() makes sure that 1.378 + * count is always < length. 1.379 + */ 1.380 + U_ASSERT(FALSE); 1.381 + return NULL; /* Never happens if uhash_put() behaves */ 1.382 + } 1.383 + return &(elements[theIndex]); 1.384 +} 1.385 + 1.386 +/** 1.387 + * Attempt to grow or shrink the data arrays in order to make the 1.388 + * count fit between the high and low water marks. hash_put() and 1.389 + * hash_remove() call this method when the count exceeds the high or 1.390 + * low water marks. This method may do nothing, if memory allocation 1.391 + * fails, or if the count is already in range, or if the length is 1.392 + * already at the low or high limit. In any case, upon return the 1.393 + * arrays will be valid. 1.394 + */ 1.395 +static void 1.396 +_uhash_rehash(UHashtable *hash, UErrorCode *status) { 1.397 + 1.398 + UHashElement *old = hash->elements; 1.399 + int32_t oldLength = hash->length; 1.400 + int32_t newPrimeIndex = hash->primeIndex; 1.401 + int32_t i; 1.402 + 1.403 + if (hash->count > hash->highWaterMark) { 1.404 + if (++newPrimeIndex >= PRIMES_LENGTH) { 1.405 + return; 1.406 + } 1.407 + } else if (hash->count < hash->lowWaterMark) { 1.408 + if (--newPrimeIndex < 0) { 1.409 + return; 1.410 + } 1.411 + } else { 1.412 + return; 1.413 + } 1.414 + 1.415 + _uhash_allocate(hash, newPrimeIndex, status); 1.416 + 1.417 + if (U_FAILURE(*status)) { 1.418 + hash->elements = old; 1.419 + hash->length = oldLength; 1.420 + return; 1.421 + } 1.422 + 1.423 + for (i = oldLength - 1; i >= 0; --i) { 1.424 + if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) { 1.425 + UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode); 1.426 + U_ASSERT(e != NULL); 1.427 + U_ASSERT(e->hashcode == HASH_EMPTY); 1.428 + e->key = old[i].key; 1.429 + e->value = old[i].value; 1.430 + e->hashcode = old[i].hashcode; 1.431 + ++hash->count; 1.432 + } 1.433 + } 1.434 + 1.435 + uprv_free(old); 1.436 +} 1.437 + 1.438 +static UHashTok 1.439 +_uhash_remove(UHashtable *hash, 1.440 + UHashTok key) { 1.441 + /* First find the position of the key in the table. If the object 1.442 + * has not been removed already, remove it. If the user wanted 1.443 + * keys deleted, then delete it also. We have to put a special 1.444 + * hashcode in that position that means that something has been 1.445 + * deleted, since when we do a find, we have to continue PAST any 1.446 + * deleted values. 1.447 + */ 1.448 + UHashTok result; 1.449 + UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key)); 1.450 + U_ASSERT(e != NULL); 1.451 + result.pointer = NULL; 1.452 + result.integer = 0; 1.453 + if (!IS_EMPTY_OR_DELETED(e->hashcode)) { 1.454 + result = _uhash_internalRemoveElement(hash, e); 1.455 + if (hash->count < hash->lowWaterMark) { 1.456 + UErrorCode status = U_ZERO_ERROR; 1.457 + _uhash_rehash(hash, &status); 1.458 + } 1.459 + } 1.460 + return result; 1.461 +} 1.462 + 1.463 +static UHashTok 1.464 +_uhash_put(UHashtable *hash, 1.465 + UHashTok key, 1.466 + UHashTok value, 1.467 + int8_t hint, 1.468 + UErrorCode *status) { 1.469 + 1.470 + /* Put finds the position in the table for the new value. If the 1.471 + * key is already in the table, it is deleted, if there is a 1.472 + * non-NULL keyDeleter. Then the key, the hash and the value are 1.473 + * all put at the position in their respective arrays. 1.474 + */ 1.475 + int32_t hashcode; 1.476 + UHashElement* e; 1.477 + UHashTok emptytok; 1.478 + 1.479 + if (U_FAILURE(*status)) { 1.480 + goto err; 1.481 + } 1.482 + U_ASSERT(hash != NULL); 1.483 + /* Cannot always check pointer here or iSeries sees NULL every time. */ 1.484 + if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { 1.485 + /* Disallow storage of NULL values, since NULL is returned by 1.486 + * get() to indicate an absent key. Storing NULL == removing. 1.487 + */ 1.488 + return _uhash_remove(hash, key); 1.489 + } 1.490 + if (hash->count > hash->highWaterMark) { 1.491 + _uhash_rehash(hash, status); 1.492 + if (U_FAILURE(*status)) { 1.493 + goto err; 1.494 + } 1.495 + } 1.496 + 1.497 + hashcode = (*hash->keyHasher)(key); 1.498 + e = _uhash_find(hash, key, hashcode); 1.499 + U_ASSERT(e != NULL); 1.500 + 1.501 + if (IS_EMPTY_OR_DELETED(e->hashcode)) { 1.502 + /* Important: We must never actually fill the table up. If we 1.503 + * do so, then _uhash_find() will return NULL, and we'll have 1.504 + * to check for NULL after every call to _uhash_find(). To 1.505 + * avoid this we make sure there is always at least one empty 1.506 + * or deleted slot in the table. This only is a problem if we 1.507 + * are out of memory and rehash isn't working. 1.508 + */ 1.509 + ++hash->count; 1.510 + if (hash->count == hash->length) { 1.511 + /* Don't allow count to reach length */ 1.512 + --hash->count; 1.513 + *status = U_MEMORY_ALLOCATION_ERROR; 1.514 + goto err; 1.515 + } 1.516 + } 1.517 + 1.518 + /* We must in all cases handle storage properly. If there was an 1.519 + * old key, then it must be deleted (if the deleter != NULL). 1.520 + * Make hashcodes stored in table positive. 1.521 + */ 1.522 + return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint); 1.523 + 1.524 + err: 1.525 + /* If the deleters are non-NULL, this method adopts its key and/or 1.526 + * value arguments, and we must be sure to delete the key and/or 1.527 + * value in all cases, even upon failure. 1.528 + */ 1.529 + HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer); 1.530 + emptytok.pointer = NULL; emptytok.integer = 0; 1.531 + return emptytok; 1.532 +} 1.533 + 1.534 + 1.535 +/******************************************************************** 1.536 + * PUBLIC API 1.537 + ********************************************************************/ 1.538 + 1.539 +U_CAPI UHashtable* U_EXPORT2 1.540 +uhash_open(UHashFunction *keyHash, 1.541 + UKeyComparator *keyComp, 1.542 + UValueComparator *valueComp, 1.543 + UErrorCode *status) { 1.544 + 1.545 + return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); 1.546 +} 1.547 + 1.548 +U_CAPI UHashtable* U_EXPORT2 1.549 +uhash_openSize(UHashFunction *keyHash, 1.550 + UKeyComparator *keyComp, 1.551 + UValueComparator *valueComp, 1.552 + int32_t size, 1.553 + UErrorCode *status) { 1.554 + 1.555 + /* Find the smallest index i for which PRIMES[i] >= size. */ 1.556 + int32_t i = 0; 1.557 + while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) { 1.558 + ++i; 1.559 + } 1.560 + 1.561 + return _uhash_create(keyHash, keyComp, valueComp, i, status); 1.562 +} 1.563 + 1.564 +U_CAPI UHashtable* U_EXPORT2 1.565 +uhash_init(UHashtable *fillinResult, 1.566 + UHashFunction *keyHash, 1.567 + UKeyComparator *keyComp, 1.568 + UValueComparator *valueComp, 1.569 + UErrorCode *status) { 1.570 + 1.571 + return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); 1.572 +} 1.573 + 1.574 +U_CAPI void U_EXPORT2 1.575 +uhash_close(UHashtable *hash) { 1.576 + if (hash == NULL) { 1.577 + return; 1.578 + } 1.579 + if (hash->elements != NULL) { 1.580 + if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) { 1.581 + int32_t pos=-1; 1.582 + UHashElement *e; 1.583 + while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) { 1.584 + HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer); 1.585 + } 1.586 + } 1.587 + uprv_free(hash->elements); 1.588 + hash->elements = NULL; 1.589 + } 1.590 + if (hash->allocated) { 1.591 + uprv_free(hash); 1.592 + } 1.593 +} 1.594 + 1.595 +U_CAPI UHashFunction *U_EXPORT2 1.596 +uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { 1.597 + UHashFunction *result = hash->keyHasher; 1.598 + hash->keyHasher = fn; 1.599 + return result; 1.600 +} 1.601 + 1.602 +U_CAPI UKeyComparator *U_EXPORT2 1.603 +uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { 1.604 + UKeyComparator *result = hash->keyComparator; 1.605 + hash->keyComparator = fn; 1.606 + return result; 1.607 +} 1.608 +U_CAPI UValueComparator *U_EXPORT2 1.609 +uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ 1.610 + UValueComparator *result = hash->valueComparator; 1.611 + hash->valueComparator = fn; 1.612 + return result; 1.613 +} 1.614 + 1.615 +U_CAPI UObjectDeleter *U_EXPORT2 1.616 +uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { 1.617 + UObjectDeleter *result = hash->keyDeleter; 1.618 + hash->keyDeleter = fn; 1.619 + return result; 1.620 +} 1.621 + 1.622 +U_CAPI UObjectDeleter *U_EXPORT2 1.623 +uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { 1.624 + UObjectDeleter *result = hash->valueDeleter; 1.625 + hash->valueDeleter = fn; 1.626 + return result; 1.627 +} 1.628 + 1.629 +U_CAPI void U_EXPORT2 1.630 +uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { 1.631 + UErrorCode status = U_ZERO_ERROR; 1.632 + _uhash_internalSetResizePolicy(hash, policy); 1.633 + hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); 1.634 + hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); 1.635 + _uhash_rehash(hash, &status); 1.636 +} 1.637 + 1.638 +U_CAPI int32_t U_EXPORT2 1.639 +uhash_count(const UHashtable *hash) { 1.640 + return hash->count; 1.641 +} 1.642 + 1.643 +U_CAPI void* U_EXPORT2 1.644 +uhash_get(const UHashtable *hash, 1.645 + const void* key) { 1.646 + UHashTok keyholder; 1.647 + keyholder.pointer = (void*) key; 1.648 + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; 1.649 +} 1.650 + 1.651 +U_CAPI void* U_EXPORT2 1.652 +uhash_iget(const UHashtable *hash, 1.653 + int32_t key) { 1.654 + UHashTok keyholder; 1.655 + keyholder.integer = key; 1.656 + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; 1.657 +} 1.658 + 1.659 +U_CAPI int32_t U_EXPORT2 1.660 +uhash_geti(const UHashtable *hash, 1.661 + const void* key) { 1.662 + UHashTok keyholder; 1.663 + keyholder.pointer = (void*) key; 1.664 + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; 1.665 +} 1.666 + 1.667 +U_CAPI int32_t U_EXPORT2 1.668 +uhash_igeti(const UHashtable *hash, 1.669 + int32_t key) { 1.670 + UHashTok keyholder; 1.671 + keyholder.integer = key; 1.672 + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; 1.673 +} 1.674 + 1.675 +U_CAPI void* U_EXPORT2 1.676 +uhash_put(UHashtable *hash, 1.677 + void* key, 1.678 + void* value, 1.679 + UErrorCode *status) { 1.680 + UHashTok keyholder, valueholder; 1.681 + keyholder.pointer = key; 1.682 + valueholder.pointer = value; 1.683 + return _uhash_put(hash, keyholder, valueholder, 1.684 + HINT_KEY_POINTER | HINT_VALUE_POINTER, 1.685 + status).pointer; 1.686 +} 1.687 + 1.688 +U_CAPI void* U_EXPORT2 1.689 +uhash_iput(UHashtable *hash, 1.690 + int32_t key, 1.691 + void* value, 1.692 + UErrorCode *status) { 1.693 + UHashTok keyholder, valueholder; 1.694 + keyholder.integer = key; 1.695 + valueholder.pointer = value; 1.696 + return _uhash_put(hash, keyholder, valueholder, 1.697 + HINT_VALUE_POINTER, 1.698 + status).pointer; 1.699 +} 1.700 + 1.701 +U_CAPI int32_t U_EXPORT2 1.702 +uhash_puti(UHashtable *hash, 1.703 + void* key, 1.704 + int32_t value, 1.705 + UErrorCode *status) { 1.706 + UHashTok keyholder, valueholder; 1.707 + keyholder.pointer = key; 1.708 + valueholder.integer = value; 1.709 + return _uhash_put(hash, keyholder, valueholder, 1.710 + HINT_KEY_POINTER, 1.711 + status).integer; 1.712 +} 1.713 + 1.714 + 1.715 +U_CAPI int32_t U_EXPORT2 1.716 +uhash_iputi(UHashtable *hash, 1.717 + int32_t key, 1.718 + int32_t value, 1.719 + UErrorCode *status) { 1.720 + UHashTok keyholder, valueholder; 1.721 + keyholder.integer = key; 1.722 + valueholder.integer = value; 1.723 + return _uhash_put(hash, keyholder, valueholder, 1.724 + 0, /* neither is a ptr */ 1.725 + status).integer; 1.726 +} 1.727 + 1.728 +U_CAPI void* U_EXPORT2 1.729 +uhash_remove(UHashtable *hash, 1.730 + const void* key) { 1.731 + UHashTok keyholder; 1.732 + keyholder.pointer = (void*) key; 1.733 + return _uhash_remove(hash, keyholder).pointer; 1.734 +} 1.735 + 1.736 +U_CAPI void* U_EXPORT2 1.737 +uhash_iremove(UHashtable *hash, 1.738 + int32_t key) { 1.739 + UHashTok keyholder; 1.740 + keyholder.integer = key; 1.741 + return _uhash_remove(hash, keyholder).pointer; 1.742 +} 1.743 + 1.744 +U_CAPI int32_t U_EXPORT2 1.745 +uhash_removei(UHashtable *hash, 1.746 + const void* key) { 1.747 + UHashTok keyholder; 1.748 + keyholder.pointer = (void*) key; 1.749 + return _uhash_remove(hash, keyholder).integer; 1.750 +} 1.751 + 1.752 +U_CAPI int32_t U_EXPORT2 1.753 +uhash_iremovei(UHashtable *hash, 1.754 + int32_t key) { 1.755 + UHashTok keyholder; 1.756 + keyholder.integer = key; 1.757 + return _uhash_remove(hash, keyholder).integer; 1.758 +} 1.759 + 1.760 +U_CAPI void U_EXPORT2 1.761 +uhash_removeAll(UHashtable *hash) { 1.762 + int32_t pos = -1; 1.763 + const UHashElement *e; 1.764 + U_ASSERT(hash != NULL); 1.765 + if (hash->count != 0) { 1.766 + while ((e = uhash_nextElement(hash, &pos)) != NULL) { 1.767 + uhash_removeElement(hash, e); 1.768 + } 1.769 + } 1.770 + U_ASSERT(hash->count == 0); 1.771 +} 1.772 + 1.773 +U_CAPI const UHashElement* U_EXPORT2 1.774 +uhash_find(const UHashtable *hash, const void* key) { 1.775 + UHashTok keyholder; 1.776 + const UHashElement *e; 1.777 + keyholder.pointer = (void*) key; 1.778 + e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); 1.779 + return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e; 1.780 +} 1.781 + 1.782 +U_CAPI const UHashElement* U_EXPORT2 1.783 +uhash_nextElement(const UHashtable *hash, int32_t *pos) { 1.784 + /* Walk through the array until we find an element that is not 1.785 + * EMPTY and not DELETED. 1.786 + */ 1.787 + int32_t i; 1.788 + U_ASSERT(hash != NULL); 1.789 + for (i = *pos + 1; i < hash->length; ++i) { 1.790 + if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) { 1.791 + *pos = i; 1.792 + return &(hash->elements[i]); 1.793 + } 1.794 + } 1.795 + 1.796 + /* No more elements */ 1.797 + return NULL; 1.798 +} 1.799 + 1.800 +U_CAPI void* U_EXPORT2 1.801 +uhash_removeElement(UHashtable *hash, const UHashElement* e) { 1.802 + U_ASSERT(hash != NULL); 1.803 + U_ASSERT(e != NULL); 1.804 + if (!IS_EMPTY_OR_DELETED(e->hashcode)) { 1.805 + UHashElement *nce = (UHashElement *)e; 1.806 + return _uhash_internalRemoveElement(hash, nce).pointer; 1.807 + } 1.808 + return NULL; 1.809 +} 1.810 + 1.811 +/******************************************************************** 1.812 + * UHashTok convenience 1.813 + ********************************************************************/ 1.814 + 1.815 +/** 1.816 + * Return a UHashTok for an integer. 1.817 + */ 1.818 +/*U_CAPI UHashTok U_EXPORT2 1.819 +uhash_toki(int32_t i) { 1.820 + UHashTok tok; 1.821 + tok.integer = i; 1.822 + return tok; 1.823 +}*/ 1.824 + 1.825 +/** 1.826 + * Return a UHashTok for a pointer. 1.827 + */ 1.828 +/*U_CAPI UHashTok U_EXPORT2 1.829 +uhash_tokp(void* p) { 1.830 + UHashTok tok; 1.831 + tok.pointer = p; 1.832 + return tok; 1.833 +}*/ 1.834 + 1.835 +/******************************************************************** 1.836 + * PUBLIC Key Hash Functions 1.837 + ********************************************************************/ 1.838 + 1.839 +U_CAPI int32_t U_EXPORT2 1.840 +uhash_hashUChars(const UHashTok key) { 1.841 + const UChar *s = (const UChar *)key.pointer; 1.842 + return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s)); 1.843 +} 1.844 + 1.845 +U_CAPI int32_t U_EXPORT2 1.846 +uhash_hashChars(const UHashTok key) { 1.847 + const char *s = (const char *)key.pointer; 1.848 + return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s)); 1.849 +} 1.850 + 1.851 +U_CAPI int32_t U_EXPORT2 1.852 +uhash_hashIChars(const UHashTok key) { 1.853 + const char *s = (const char *)key.pointer; 1.854 + return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s)); 1.855 +} 1.856 + 1.857 +U_CAPI UBool U_EXPORT2 1.858 +uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ 1.859 + int32_t count1, count2, pos, i; 1.860 + 1.861 + if(hash1==hash2){ 1.862 + return TRUE; 1.863 + } 1.864 + 1.865 + /* 1.866 + * Make sure that we are comparing 2 valid hashes of the same type 1.867 + * with valid comparison functions. 1.868 + * Without valid comparison functions, a binary comparison 1.869 + * of the hash values will yield random results on machines 1.870 + * with 64-bit pointers and 32-bit integer hashes. 1.871 + * A valueComparator is normally optional. 1.872 + */ 1.873 + if (hash1==NULL || hash2==NULL || 1.874 + hash1->keyComparator != hash2->keyComparator || 1.875 + hash1->valueComparator != hash2->valueComparator || 1.876 + hash1->valueComparator == NULL) 1.877 + { 1.878 + /* 1.879 + Normally we would return an error here about incompatible hash tables, 1.880 + but we return FALSE instead. 1.881 + */ 1.882 + return FALSE; 1.883 + } 1.884 + 1.885 + count1 = uhash_count(hash1); 1.886 + count2 = uhash_count(hash2); 1.887 + if(count1!=count2){ 1.888 + return FALSE; 1.889 + } 1.890 + 1.891 + pos=-1; 1.892 + for(i=0; i<count1; i++){ 1.893 + const UHashElement* elem1 = uhash_nextElement(hash1, &pos); 1.894 + const UHashTok key1 = elem1->key; 1.895 + const UHashTok val1 = elem1->value; 1.896 + /* here the keys are not compared, instead the key form hash1 is used to fetch 1.897 + * value from hash2. If the hashes are equal then then both hashes should 1.898 + * contain equal values for the same key! 1.899 + */ 1.900 + const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); 1.901 + const UHashTok val2 = elem2->value; 1.902 + if(hash1->valueComparator(val1, val2)==FALSE){ 1.903 + return FALSE; 1.904 + } 1.905 + } 1.906 + return TRUE; 1.907 +} 1.908 + 1.909 +/******************************************************************** 1.910 + * PUBLIC Comparator Functions 1.911 + ********************************************************************/ 1.912 + 1.913 +U_CAPI UBool U_EXPORT2 1.914 +uhash_compareUChars(const UHashTok key1, const UHashTok key2) { 1.915 + const UChar *p1 = (const UChar*) key1.pointer; 1.916 + const UChar *p2 = (const UChar*) key2.pointer; 1.917 + if (p1 == p2) { 1.918 + return TRUE; 1.919 + } 1.920 + if (p1 == NULL || p2 == NULL) { 1.921 + return FALSE; 1.922 + } 1.923 + while (*p1 != 0 && *p1 == *p2) { 1.924 + ++p1; 1.925 + ++p2; 1.926 + } 1.927 + return (UBool)(*p1 == *p2); 1.928 +} 1.929 + 1.930 +U_CAPI UBool U_EXPORT2 1.931 +uhash_compareChars(const UHashTok key1, const UHashTok key2) { 1.932 + const char *p1 = (const char*) key1.pointer; 1.933 + const char *p2 = (const char*) key2.pointer; 1.934 + if (p1 == p2) { 1.935 + return TRUE; 1.936 + } 1.937 + if (p1 == NULL || p2 == NULL) { 1.938 + return FALSE; 1.939 + } 1.940 + while (*p1 != 0 && *p1 == *p2) { 1.941 + ++p1; 1.942 + ++p2; 1.943 + } 1.944 + return (UBool)(*p1 == *p2); 1.945 +} 1.946 + 1.947 +U_CAPI UBool U_EXPORT2 1.948 +uhash_compareIChars(const UHashTok key1, const UHashTok key2) { 1.949 + const char *p1 = (const char*) key1.pointer; 1.950 + const char *p2 = (const char*) key2.pointer; 1.951 + if (p1 == p2) { 1.952 + return TRUE; 1.953 + } 1.954 + if (p1 == NULL || p2 == NULL) { 1.955 + return FALSE; 1.956 + } 1.957 + while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) { 1.958 + ++p1; 1.959 + ++p2; 1.960 + } 1.961 + return (UBool)(*p1 == *p2); 1.962 +} 1.963 + 1.964 +/******************************************************************** 1.965 + * PUBLIC int32_t Support Functions 1.966 + ********************************************************************/ 1.967 + 1.968 +U_CAPI int32_t U_EXPORT2 1.969 +uhash_hashLong(const UHashTok key) { 1.970 + return key.integer; 1.971 +} 1.972 + 1.973 +U_CAPI UBool U_EXPORT2 1.974 +uhash_compareLong(const UHashTok key1, const UHashTok key2) { 1.975 + return (UBool)(key1.integer == key2.integer); 1.976 +}