intl/icu/source/common/uhash.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/uhash.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,973 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*   Copyright (C) 1997-2011, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +******************************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   03/22/00    aliu        Adapted from original C++ ICU Hashtable.
    1.11 +*   07/06/01    aliu        Modified to support int32_t keys on
    1.12 +*                           platforms with sizeof(void*) < 32.
    1.13 +******************************************************************************
    1.14 +*/
    1.15 +
    1.16 +#include "uhash.h"
    1.17 +#include "unicode/ustring.h"
    1.18 +#include "cstring.h"
    1.19 +#include "cmemory.h"
    1.20 +#include "uassert.h"
    1.21 +#include "ustr_imp.h"
    1.22 +
    1.23 +/* This hashtable is implemented as a double hash.  All elements are
    1.24 + * stored in a single array with no secondary storage for collision
    1.25 + * resolution (no linked list, etc.).  When there is a hash collision
    1.26 + * (when two unequal keys have the same hashcode) we resolve this by
    1.27 + * using a secondary hash.  The secondary hash is an increment
    1.28 + * computed as a hash function (a different one) of the primary
    1.29 + * hashcode.  This increment is added to the initial hash value to
    1.30 + * obtain further slots assigned to the same hash code.  For this to
    1.31 + * work, the length of the array and the increment must be relatively
    1.32 + * prime.  The easiest way to achieve this is to have the length of
    1.33 + * the array be prime, and the increment be any value from
    1.34 + * 1..length-1.
    1.35 + *
    1.36 + * Hashcodes are 32-bit integers.  We make sure all hashcodes are
    1.37 + * non-negative by masking off the top bit.  This has two effects: (1)
    1.38 + * modulo arithmetic is simplified.  If we allowed negative hashcodes,
    1.39 + * then when we computed hashcode % length, we could get a negative
    1.40 + * result, which we would then have to adjust back into range.  It's
    1.41 + * simpler to just make hashcodes non-negative. (2) It makes it easy
    1.42 + * to check for empty vs. occupied slots in the table.  We just mark
    1.43 + * empty or deleted slots with a negative hashcode.
    1.44 + *
    1.45 + * The central function is _uhash_find().  This function looks for a
    1.46 + * slot matching the given key and hashcode.  If one is found, it
    1.47 + * returns a pointer to that slot.  If the table is full, and no match
    1.48 + * is found, it returns NULL -- in theory.  This would make the code
    1.49 + * more complicated, since all callers of _uhash_find() would then
    1.50 + * have to check for a NULL result.  To keep this from happening, we
    1.51 + * don't allow the table to fill.  When there is only one
    1.52 + * empty/deleted slot left, uhash_put() will refuse to increase the
    1.53 + * count, and fail.  This simplifies the code.  In practice, one will
    1.54 + * seldom encounter this using default UHashtables.  However, if a
    1.55 + * hashtable is set to a U_FIXED resize policy, or if memory is
    1.56 + * exhausted, then the table may fill.
    1.57 + *
    1.58 + * High and low water ratios control rehashing.  They establish levels
    1.59 + * of fullness (from 0 to 1) outside of which the data array is
    1.60 + * reallocated and repopulated.  Setting the low water ratio to zero
    1.61 + * means the table will never shrink.  Setting the high water ratio to
    1.62 + * one means the table will never grow.  The ratios should be
    1.63 + * coordinated with the ratio between successive elements of the
    1.64 + * PRIMES table, so that when the primeIndex is incremented or
    1.65 + * decremented during rehashing, it brings the ratio of count / length
    1.66 + * back into the desired range (between low and high water ratios).
    1.67 + */
    1.68 +
    1.69 +/********************************************************************
    1.70 + * PRIVATE Constants, Macros
    1.71 + ********************************************************************/
    1.72 +
    1.73 +/* This is a list of non-consecutive primes chosen such that
    1.74 + * PRIMES[i+1] ~ 2*PRIMES[i].  (Currently, the ratio ranges from 1.81
    1.75 + * to 2.18; the inverse ratio ranges from 0.459 to 0.552.)  If this
    1.76 + * ratio is changed, the low and high water ratios should also be
    1.77 + * adjusted to suit.
    1.78 + *
    1.79 + * These prime numbers were also chosen so that they are the largest
    1.80 + * prime number while being less than a power of two.
    1.81 + */
    1.82 +static const int32_t PRIMES[] = {
    1.83 +    13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
    1.84 +    65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
    1.85 +    16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
    1.86 +    1073741789, 2147483647 /*, 4294967291 */
    1.87 +};
    1.88 +
    1.89 +#define PRIMES_LENGTH (sizeof(PRIMES) / sizeof(PRIMES[0]))
    1.90 +#define DEFAULT_PRIME_INDEX 3
    1.91 +
    1.92 +/* These ratios are tuned to the PRIMES array such that a resize
    1.93 + * places the table back into the zone of non-resizing.  That is,
    1.94 + * after a call to _uhash_rehash(), a subsequent call to
    1.95 + * _uhash_rehash() should do nothing (should not churn).  This is only
    1.96 + * a potential problem with U_GROW_AND_SHRINK.
    1.97 + */
    1.98 +static const float RESIZE_POLICY_RATIO_TABLE[6] = {
    1.99 +    /* low, high water ratio */
   1.100 +    0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
   1.101 +    0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
   1.102 +    0.0F, 1.0F  /* U_FIXED: Never change size */
   1.103 +};
   1.104 +
   1.105 +/*
   1.106 +  Invariants for hashcode values:
   1.107 +
   1.108 +  * DELETED < 0
   1.109 +  * EMPTY < 0
   1.110 +  * Real hashes >= 0
   1.111 +
   1.112 +  Hashcodes may not start out this way, but internally they are
   1.113 +  adjusted so that they are always positive.  We assume 32-bit
   1.114 +  hashcodes; adjust these constants for other hashcode sizes.
   1.115 +*/
   1.116 +#define HASH_DELETED    ((int32_t) 0x80000000)
   1.117 +#define HASH_EMPTY      ((int32_t) HASH_DELETED + 1)
   1.118 +
   1.119 +#define IS_EMPTY_OR_DELETED(x) ((x) < 0)
   1.120 +
   1.121 +/* This macro expects a UHashTok.pointer as its keypointer and
   1.122 +   valuepointer parameters */
   1.123 +#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \
   1.124 +            if (hash->keyDeleter != NULL && keypointer != NULL) { \
   1.125 +                (*hash->keyDeleter)(keypointer); \
   1.126 +            } \
   1.127 +            if (hash->valueDeleter != NULL && valuepointer != NULL) { \
   1.128 +                (*hash->valueDeleter)(valuepointer); \
   1.129 +            }
   1.130 +
   1.131 +/*
   1.132 + * Constants for hinting whether a key or value is an integer
   1.133 + * or a pointer.  If a hint bit is zero, then the associated
   1.134 + * token is assumed to be an integer.
   1.135 + */
   1.136 +#define HINT_KEY_POINTER   (1)
   1.137 +#define HINT_VALUE_POINTER (2)
   1.138 +
   1.139 +/********************************************************************
   1.140 + * PRIVATE Implementation
   1.141 + ********************************************************************/
   1.142 +
   1.143 +static UHashTok
   1.144 +_uhash_setElement(UHashtable *hash, UHashElement* e,
   1.145 +                  int32_t hashcode,
   1.146 +                  UHashTok key, UHashTok value, int8_t hint) {
   1.147 +
   1.148 +    UHashTok oldValue = e->value;
   1.149 +    if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
   1.150 +        e->key.pointer != key.pointer) { /* Avoid double deletion */
   1.151 +        (*hash->keyDeleter)(e->key.pointer);
   1.152 +    }
   1.153 +    if (hash->valueDeleter != NULL) {
   1.154 +        if (oldValue.pointer != NULL &&
   1.155 +            oldValue.pointer != value.pointer) { /* Avoid double deletion */
   1.156 +            (*hash->valueDeleter)(oldValue.pointer);
   1.157 +        }
   1.158 +        oldValue.pointer = NULL;
   1.159 +    }
   1.160 +    /* Compilers should copy the UHashTok union correctly, but even if
   1.161 +     * they do, memory heap tools (e.g. BoundsChecker) can get
   1.162 +     * confused when a pointer is cloaked in a union and then copied.
   1.163 +     * TO ALLEVIATE THIS, we use hints (based on what API the user is
   1.164 +     * calling) to copy pointers when we know the user thinks
   1.165 +     * something is a pointer. */
   1.166 +    if (hint & HINT_KEY_POINTER) {
   1.167 +        e->key.pointer = key.pointer;
   1.168 +    } else {
   1.169 +        e->key = key;
   1.170 +    }
   1.171 +    if (hint & HINT_VALUE_POINTER) {
   1.172 +        e->value.pointer = value.pointer;
   1.173 +    } else {
   1.174 +        e->value = value;
   1.175 +    }
   1.176 +    e->hashcode = hashcode;
   1.177 +    return oldValue;
   1.178 +}
   1.179 +
   1.180 +/**
   1.181 + * Assumes that the given element is not empty or deleted.
   1.182 + */
   1.183 +static UHashTok
   1.184 +_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
   1.185 +    UHashTok empty;
   1.186 +    U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
   1.187 +    --hash->count;
   1.188 +    empty.pointer = NULL; empty.integer = 0;
   1.189 +    return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
   1.190 +}
   1.191 +
   1.192 +static void
   1.193 +_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
   1.194 +    U_ASSERT(hash != NULL);
   1.195 +    U_ASSERT(((int32_t)policy) >= 0);
   1.196 +    U_ASSERT(((int32_t)policy) < 3);
   1.197 +    hash->lowWaterRatio  = RESIZE_POLICY_RATIO_TABLE[policy * 2];
   1.198 +    hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
   1.199 +}
   1.200 +
   1.201 +/**
   1.202 + * Allocate internal data array of a size determined by the given
   1.203 + * prime index.  If the index is out of range it is pinned into range.
   1.204 + * If the allocation fails the status is set to
   1.205 + * U_MEMORY_ALLOCATION_ERROR and all array storage is freed.  In
   1.206 + * either case the previous array pointer is overwritten.
   1.207 + *
   1.208 + * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
   1.209 + */
   1.210 +static void
   1.211 +_uhash_allocate(UHashtable *hash,
   1.212 +                int32_t primeIndex,
   1.213 +                UErrorCode *status) {
   1.214 +
   1.215 +    UHashElement *p, *limit;
   1.216 +    UHashTok emptytok;
   1.217 +
   1.218 +    if (U_FAILURE(*status)) return;
   1.219 +
   1.220 +    U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
   1.221 +
   1.222 +    hash->primeIndex = primeIndex;
   1.223 +    hash->length = PRIMES[primeIndex];
   1.224 +
   1.225 +    p = hash->elements = (UHashElement*)
   1.226 +        uprv_malloc(sizeof(UHashElement) * hash->length);
   1.227 +
   1.228 +    if (hash->elements == NULL) {
   1.229 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.230 +        return;
   1.231 +    }
   1.232 +
   1.233 +    emptytok.pointer = NULL; /* Only one of these two is needed */
   1.234 +    emptytok.integer = 0;    /* but we don't know which one. */
   1.235 +    
   1.236 +    limit = p + hash->length;
   1.237 +    while (p < limit) {
   1.238 +        p->key = emptytok;
   1.239 +        p->value = emptytok;
   1.240 +        p->hashcode = HASH_EMPTY;
   1.241 +        ++p;
   1.242 +    }
   1.243 +
   1.244 +    hash->count = 0;
   1.245 +    hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
   1.246 +    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
   1.247 +}
   1.248 +
   1.249 +static UHashtable*
   1.250 +_uhash_init(UHashtable *result,
   1.251 +              UHashFunction *keyHash, 
   1.252 +              UKeyComparator *keyComp,
   1.253 +              UValueComparator *valueComp,
   1.254 +              int32_t primeIndex,
   1.255 +              UErrorCode *status)
   1.256 +{
   1.257 +    if (U_FAILURE(*status)) return NULL;
   1.258 +    U_ASSERT(keyHash != NULL);
   1.259 +    U_ASSERT(keyComp != NULL);
   1.260 +
   1.261 +    result->keyHasher       = keyHash;
   1.262 +    result->keyComparator   = keyComp;
   1.263 +    result->valueComparator = valueComp;
   1.264 +    result->keyDeleter      = NULL;
   1.265 +    result->valueDeleter    = NULL;
   1.266 +    result->allocated       = FALSE;
   1.267 +    _uhash_internalSetResizePolicy(result, U_GROW);
   1.268 +
   1.269 +    _uhash_allocate(result, primeIndex, status);
   1.270 +
   1.271 +    if (U_FAILURE(*status)) {
   1.272 +        return NULL;
   1.273 +    }
   1.274 +
   1.275 +    return result;
   1.276 +}
   1.277 +
   1.278 +static UHashtable*
   1.279 +_uhash_create(UHashFunction *keyHash, 
   1.280 +              UKeyComparator *keyComp,
   1.281 +              UValueComparator *valueComp,
   1.282 +              int32_t primeIndex,
   1.283 +              UErrorCode *status) {
   1.284 +    UHashtable *result;
   1.285 +
   1.286 +    if (U_FAILURE(*status)) return NULL;
   1.287 +
   1.288 +    result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
   1.289 +    if (result == NULL) {
   1.290 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.291 +        return NULL;
   1.292 +    }
   1.293 +
   1.294 +    _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
   1.295 +    result->allocated       = TRUE;
   1.296 +
   1.297 +    if (U_FAILURE(*status)) {
   1.298 +        uprv_free(result);
   1.299 +        return NULL;
   1.300 +    }
   1.301 +
   1.302 +    return result;
   1.303 +}
   1.304 +
   1.305 +/**
   1.306 + * Look for a key in the table, or if no such key exists, the first
   1.307 + * empty slot matching the given hashcode.  Keys are compared using
   1.308 + * the keyComparator function.
   1.309 + *
   1.310 + * First find the start position, which is the hashcode modulo
   1.311 + * the length.  Test it to see if it is:
   1.312 + *
   1.313 + * a. identical:  First check the hash values for a quick check,
   1.314 + *    then compare keys for equality using keyComparator.
   1.315 + * b. deleted
   1.316 + * c. empty
   1.317 + *
   1.318 + * Stop if it is identical or empty, otherwise continue by adding a
   1.319 + * "jump" value (moduloing by the length again to keep it within
   1.320 + * range) and retesting.  For efficiency, there need enough empty
   1.321 + * values so that the searchs stop within a reasonable amount of time.
   1.322 + * This can be changed by changing the high/low water marks.
   1.323 + *
   1.324 + * In theory, this function can return NULL, if it is full (no empty
   1.325 + * or deleted slots) and if no matching key is found.  In practice, we
   1.326 + * prevent this elsewhere (in uhash_put) by making sure the last slot
   1.327 + * in the table is never filled.
   1.328 + *
   1.329 + * The size of the table should be prime for this algorithm to work;
   1.330 + * otherwise we are not guaranteed that the jump value (the secondary
   1.331 + * hash) is relatively prime to the table length.
   1.332 + */
   1.333 +static UHashElement*
   1.334 +_uhash_find(const UHashtable *hash, UHashTok key,
   1.335 +            int32_t hashcode) {
   1.336 +
   1.337 +    int32_t firstDeleted = -1;  /* assume invalid index */
   1.338 +    int32_t theIndex, startIndex;
   1.339 +    int32_t jump = 0; /* lazy evaluate */
   1.340 +    int32_t tableHash;
   1.341 +    UHashElement *elements = hash->elements;
   1.342 +
   1.343 +    hashcode &= 0x7FFFFFFF; /* must be positive */
   1.344 +    startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
   1.345 +
   1.346 +    do {
   1.347 +        tableHash = elements[theIndex].hashcode;
   1.348 +        if (tableHash == hashcode) {          /* quick check */
   1.349 +            if ((*hash->keyComparator)(key, elements[theIndex].key)) {
   1.350 +                return &(elements[theIndex]);
   1.351 +            }
   1.352 +        } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
   1.353 +            /* We have hit a slot which contains a key-value pair,
   1.354 +             * but for which the hash code does not match.  Keep
   1.355 +             * looking.
   1.356 +             */
   1.357 +        } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
   1.358 +            break;
   1.359 +        } else if (firstDeleted < 0) { /* remember first deleted */
   1.360 +            firstDeleted = theIndex;
   1.361 +        }
   1.362 +        if (jump == 0) { /* lazy compute jump */
   1.363 +            /* The jump value must be relatively prime to the table
   1.364 +             * length.  As long as the length is prime, then any value
   1.365 +             * 1..length-1 will be relatively prime to it.
   1.366 +             */
   1.367 +            jump = (hashcode % (hash->length - 1)) + 1;
   1.368 +        }
   1.369 +        theIndex = (theIndex + jump) % hash->length;
   1.370 +    } while (theIndex != startIndex);
   1.371 +
   1.372 +    if (firstDeleted >= 0) {
   1.373 +        theIndex = firstDeleted; /* reset if had deleted slot */
   1.374 +    } else if (tableHash != HASH_EMPTY) {
   1.375 +        /* We get to this point if the hashtable is full (no empty or
   1.376 +         * deleted slots), and we've failed to find a match.  THIS
   1.377 +         * WILL NEVER HAPPEN as long as uhash_put() makes sure that
   1.378 +         * count is always < length.
   1.379 +         */
   1.380 +        U_ASSERT(FALSE);
   1.381 +        return NULL; /* Never happens if uhash_put() behaves */
   1.382 +    }
   1.383 +    return &(elements[theIndex]);
   1.384 +}
   1.385 +
   1.386 +/**
   1.387 + * Attempt to grow or shrink the data arrays in order to make the
   1.388 + * count fit between the high and low water marks.  hash_put() and
   1.389 + * hash_remove() call this method when the count exceeds the high or
   1.390 + * low water marks.  This method may do nothing, if memory allocation
   1.391 + * fails, or if the count is already in range, or if the length is
   1.392 + * already at the low or high limit.  In any case, upon return the
   1.393 + * arrays will be valid.
   1.394 + */
   1.395 +static void
   1.396 +_uhash_rehash(UHashtable *hash, UErrorCode *status) {
   1.397 +
   1.398 +    UHashElement *old = hash->elements;
   1.399 +    int32_t oldLength = hash->length;
   1.400 +    int32_t newPrimeIndex = hash->primeIndex;
   1.401 +    int32_t i;
   1.402 +
   1.403 +    if (hash->count > hash->highWaterMark) {
   1.404 +        if (++newPrimeIndex >= PRIMES_LENGTH) {
   1.405 +            return;
   1.406 +        }
   1.407 +    } else if (hash->count < hash->lowWaterMark) {
   1.408 +        if (--newPrimeIndex < 0) {
   1.409 +            return;
   1.410 +        }
   1.411 +    } else {
   1.412 +        return;
   1.413 +    }
   1.414 +
   1.415 +    _uhash_allocate(hash, newPrimeIndex, status);
   1.416 +
   1.417 +    if (U_FAILURE(*status)) {
   1.418 +        hash->elements = old;
   1.419 +        hash->length = oldLength;       
   1.420 +        return;
   1.421 +    }
   1.422 +
   1.423 +    for (i = oldLength - 1; i >= 0; --i) {
   1.424 +        if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
   1.425 +            UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
   1.426 +            U_ASSERT(e != NULL);
   1.427 +            U_ASSERT(e->hashcode == HASH_EMPTY);
   1.428 +            e->key = old[i].key;
   1.429 +            e->value = old[i].value;
   1.430 +            e->hashcode = old[i].hashcode;
   1.431 +            ++hash->count;
   1.432 +        }
   1.433 +    }
   1.434 +
   1.435 +    uprv_free(old);
   1.436 +}
   1.437 +
   1.438 +static UHashTok
   1.439 +_uhash_remove(UHashtable *hash,
   1.440 +              UHashTok key) {
   1.441 +    /* First find the position of the key in the table.  If the object
   1.442 +     * has not been removed already, remove it.  If the user wanted
   1.443 +     * keys deleted, then delete it also.  We have to put a special
   1.444 +     * hashcode in that position that means that something has been
   1.445 +     * deleted, since when we do a find, we have to continue PAST any
   1.446 +     * deleted values.
   1.447 +     */
   1.448 +    UHashTok result;
   1.449 +    UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
   1.450 +    U_ASSERT(e != NULL);
   1.451 +    result.pointer = NULL;
   1.452 +    result.integer = 0;
   1.453 +    if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
   1.454 +        result = _uhash_internalRemoveElement(hash, e);
   1.455 +        if (hash->count < hash->lowWaterMark) {
   1.456 +            UErrorCode status = U_ZERO_ERROR;
   1.457 +            _uhash_rehash(hash, &status);
   1.458 +        }
   1.459 +    }
   1.460 +    return result;
   1.461 +}
   1.462 +
   1.463 +static UHashTok
   1.464 +_uhash_put(UHashtable *hash,
   1.465 +           UHashTok key,
   1.466 +           UHashTok value,
   1.467 +           int8_t hint,
   1.468 +           UErrorCode *status) {
   1.469 +
   1.470 +    /* Put finds the position in the table for the new value.  If the
   1.471 +     * key is already in the table, it is deleted, if there is a
   1.472 +     * non-NULL keyDeleter.  Then the key, the hash and the value are
   1.473 +     * all put at the position in their respective arrays.
   1.474 +     */
   1.475 +    int32_t hashcode;
   1.476 +    UHashElement* e;
   1.477 +    UHashTok emptytok;
   1.478 +
   1.479 +    if (U_FAILURE(*status)) {
   1.480 +        goto err;
   1.481 +    }
   1.482 +    U_ASSERT(hash != NULL);
   1.483 +    /* Cannot always check pointer here or iSeries sees NULL every time. */
   1.484 +    if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
   1.485 +        /* Disallow storage of NULL values, since NULL is returned by
   1.486 +         * get() to indicate an absent key.  Storing NULL == removing.
   1.487 +         */
   1.488 +        return _uhash_remove(hash, key);
   1.489 +    }
   1.490 +    if (hash->count > hash->highWaterMark) {
   1.491 +        _uhash_rehash(hash, status);
   1.492 +        if (U_FAILURE(*status)) {
   1.493 +            goto err;
   1.494 +        }
   1.495 +    }
   1.496 +
   1.497 +    hashcode = (*hash->keyHasher)(key);
   1.498 +    e = _uhash_find(hash, key, hashcode);
   1.499 +    U_ASSERT(e != NULL);
   1.500 +
   1.501 +    if (IS_EMPTY_OR_DELETED(e->hashcode)) {
   1.502 +        /* Important: We must never actually fill the table up.  If we
   1.503 +         * do so, then _uhash_find() will return NULL, and we'll have
   1.504 +         * to check for NULL after every call to _uhash_find().  To
   1.505 +         * avoid this we make sure there is always at least one empty
   1.506 +         * or deleted slot in the table.  This only is a problem if we
   1.507 +         * are out of memory and rehash isn't working.
   1.508 +         */
   1.509 +        ++hash->count;
   1.510 +        if (hash->count == hash->length) {
   1.511 +            /* Don't allow count to reach length */
   1.512 +            --hash->count;
   1.513 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.514 +            goto err;
   1.515 +        }
   1.516 +    }
   1.517 +
   1.518 +    /* We must in all cases handle storage properly.  If there was an
   1.519 +     * old key, then it must be deleted (if the deleter != NULL).
   1.520 +     * Make hashcodes stored in table positive.
   1.521 +     */
   1.522 +    return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
   1.523 +
   1.524 + err:
   1.525 +    /* If the deleters are non-NULL, this method adopts its key and/or
   1.526 +     * value arguments, and we must be sure to delete the key and/or
   1.527 +     * value in all cases, even upon failure.
   1.528 +     */
   1.529 +    HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
   1.530 +    emptytok.pointer = NULL; emptytok.integer = 0;
   1.531 +    return emptytok;
   1.532 +}
   1.533 +
   1.534 +
   1.535 +/********************************************************************
   1.536 + * PUBLIC API
   1.537 + ********************************************************************/
   1.538 +
   1.539 +U_CAPI UHashtable* U_EXPORT2
   1.540 +uhash_open(UHashFunction *keyHash, 
   1.541 +           UKeyComparator *keyComp,
   1.542 +           UValueComparator *valueComp,
   1.543 +           UErrorCode *status) {
   1.544 +
   1.545 +    return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
   1.546 +}
   1.547 +
   1.548 +U_CAPI UHashtable* U_EXPORT2
   1.549 +uhash_openSize(UHashFunction *keyHash, 
   1.550 +               UKeyComparator *keyComp,
   1.551 +               UValueComparator *valueComp,
   1.552 +               int32_t size,
   1.553 +               UErrorCode *status) {
   1.554 +
   1.555 +    /* Find the smallest index i for which PRIMES[i] >= size. */
   1.556 +    int32_t i = 0;
   1.557 +    while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
   1.558 +        ++i;
   1.559 +    }
   1.560 +
   1.561 +    return _uhash_create(keyHash, keyComp, valueComp, i, status);
   1.562 +}
   1.563 +
   1.564 +U_CAPI UHashtable* U_EXPORT2
   1.565 +uhash_init(UHashtable *fillinResult,
   1.566 +           UHashFunction *keyHash, 
   1.567 +           UKeyComparator *keyComp,
   1.568 +           UValueComparator *valueComp,
   1.569 +           UErrorCode *status) {
   1.570 +
   1.571 +    return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
   1.572 +}
   1.573 +
   1.574 +U_CAPI void U_EXPORT2
   1.575 +uhash_close(UHashtable *hash) {
   1.576 +    if (hash == NULL) {
   1.577 +        return;
   1.578 +    }
   1.579 +    if (hash->elements != NULL) {
   1.580 +        if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
   1.581 +            int32_t pos=-1;
   1.582 +            UHashElement *e;
   1.583 +            while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
   1.584 +                HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
   1.585 +            }
   1.586 +        }
   1.587 +        uprv_free(hash->elements);
   1.588 +        hash->elements = NULL;
   1.589 +    }
   1.590 +    if (hash->allocated) {
   1.591 +        uprv_free(hash);
   1.592 +    }
   1.593 +}
   1.594 +
   1.595 +U_CAPI UHashFunction *U_EXPORT2
   1.596 +uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
   1.597 +    UHashFunction *result = hash->keyHasher;
   1.598 +    hash->keyHasher = fn;
   1.599 +    return result;
   1.600 +}
   1.601 +
   1.602 +U_CAPI UKeyComparator *U_EXPORT2
   1.603 +uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
   1.604 +    UKeyComparator *result = hash->keyComparator;
   1.605 +    hash->keyComparator = fn;
   1.606 +    return result;
   1.607 +}
   1.608 +U_CAPI UValueComparator *U_EXPORT2 
   1.609 +uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
   1.610 +    UValueComparator *result = hash->valueComparator;
   1.611 +    hash->valueComparator = fn;
   1.612 +    return result;
   1.613 +}
   1.614 +
   1.615 +U_CAPI UObjectDeleter *U_EXPORT2
   1.616 +uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
   1.617 +    UObjectDeleter *result = hash->keyDeleter;
   1.618 +    hash->keyDeleter = fn;
   1.619 +    return result;
   1.620 +}
   1.621 +
   1.622 +U_CAPI UObjectDeleter *U_EXPORT2
   1.623 +uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
   1.624 +    UObjectDeleter *result = hash->valueDeleter;
   1.625 +    hash->valueDeleter = fn;
   1.626 +    return result;
   1.627 +}
   1.628 +
   1.629 +U_CAPI void U_EXPORT2
   1.630 +uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
   1.631 +    UErrorCode status = U_ZERO_ERROR;
   1.632 +    _uhash_internalSetResizePolicy(hash, policy);
   1.633 +    hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
   1.634 +    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);    
   1.635 +    _uhash_rehash(hash, &status);
   1.636 +}
   1.637 +
   1.638 +U_CAPI int32_t U_EXPORT2
   1.639 +uhash_count(const UHashtable *hash) {
   1.640 +    return hash->count;
   1.641 +}
   1.642 +
   1.643 +U_CAPI void* U_EXPORT2
   1.644 +uhash_get(const UHashtable *hash,
   1.645 +          const void* key) {
   1.646 +    UHashTok keyholder;
   1.647 +    keyholder.pointer = (void*) key;
   1.648 +    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
   1.649 +}
   1.650 +
   1.651 +U_CAPI void* U_EXPORT2
   1.652 +uhash_iget(const UHashtable *hash,
   1.653 +           int32_t key) {
   1.654 +    UHashTok keyholder;
   1.655 +    keyholder.integer = key;
   1.656 +    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
   1.657 +}
   1.658 +
   1.659 +U_CAPI int32_t U_EXPORT2
   1.660 +uhash_geti(const UHashtable *hash,
   1.661 +           const void* key) {
   1.662 +    UHashTok keyholder;
   1.663 +    keyholder.pointer = (void*) key;
   1.664 +    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
   1.665 +}
   1.666 +
   1.667 +U_CAPI int32_t U_EXPORT2
   1.668 +uhash_igeti(const UHashtable *hash,
   1.669 +           int32_t key) {
   1.670 +    UHashTok keyholder;
   1.671 +    keyholder.integer = key;
   1.672 +    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
   1.673 +}
   1.674 +
   1.675 +U_CAPI void* U_EXPORT2
   1.676 +uhash_put(UHashtable *hash,
   1.677 +          void* key,
   1.678 +          void* value,
   1.679 +          UErrorCode *status) {
   1.680 +    UHashTok keyholder, valueholder;
   1.681 +    keyholder.pointer = key;
   1.682 +    valueholder.pointer = value;
   1.683 +    return _uhash_put(hash, keyholder, valueholder,
   1.684 +                      HINT_KEY_POINTER | HINT_VALUE_POINTER,
   1.685 +                      status).pointer;
   1.686 +}
   1.687 +
   1.688 +U_CAPI void* U_EXPORT2
   1.689 +uhash_iput(UHashtable *hash,
   1.690 +           int32_t key,
   1.691 +           void* value,
   1.692 +           UErrorCode *status) {
   1.693 +    UHashTok keyholder, valueholder;
   1.694 +    keyholder.integer = key;
   1.695 +    valueholder.pointer = value;
   1.696 +    return _uhash_put(hash, keyholder, valueholder,
   1.697 +                      HINT_VALUE_POINTER,
   1.698 +                      status).pointer;
   1.699 +}
   1.700 +
   1.701 +U_CAPI int32_t U_EXPORT2
   1.702 +uhash_puti(UHashtable *hash,
   1.703 +           void* key,
   1.704 +           int32_t value,
   1.705 +           UErrorCode *status) {
   1.706 +    UHashTok keyholder, valueholder;
   1.707 +    keyholder.pointer = key;
   1.708 +    valueholder.integer = value;
   1.709 +    return _uhash_put(hash, keyholder, valueholder,
   1.710 +                      HINT_KEY_POINTER,
   1.711 +                      status).integer;
   1.712 +}
   1.713 +
   1.714 +
   1.715 +U_CAPI int32_t U_EXPORT2
   1.716 +uhash_iputi(UHashtable *hash,
   1.717 +           int32_t key,
   1.718 +           int32_t value,
   1.719 +           UErrorCode *status) {
   1.720 +    UHashTok keyholder, valueholder;
   1.721 +    keyholder.integer = key;
   1.722 +    valueholder.integer = value;
   1.723 +    return _uhash_put(hash, keyholder, valueholder,
   1.724 +                      0, /* neither is a ptr */
   1.725 +                      status).integer;
   1.726 +}
   1.727 +
   1.728 +U_CAPI void* U_EXPORT2
   1.729 +uhash_remove(UHashtable *hash,
   1.730 +             const void* key) {
   1.731 +    UHashTok keyholder;
   1.732 +    keyholder.pointer = (void*) key;
   1.733 +    return _uhash_remove(hash, keyholder).pointer;
   1.734 +}
   1.735 +
   1.736 +U_CAPI void* U_EXPORT2
   1.737 +uhash_iremove(UHashtable *hash,
   1.738 +              int32_t key) {
   1.739 +    UHashTok keyholder;
   1.740 +    keyholder.integer = key;
   1.741 +    return _uhash_remove(hash, keyholder).pointer;
   1.742 +}
   1.743 +
   1.744 +U_CAPI int32_t U_EXPORT2
   1.745 +uhash_removei(UHashtable *hash,
   1.746 +              const void* key) {
   1.747 +    UHashTok keyholder;
   1.748 +    keyholder.pointer = (void*) key;
   1.749 +    return _uhash_remove(hash, keyholder).integer;
   1.750 +}
   1.751 +
   1.752 +U_CAPI int32_t U_EXPORT2
   1.753 +uhash_iremovei(UHashtable *hash,
   1.754 +               int32_t key) {
   1.755 +    UHashTok keyholder;
   1.756 +    keyholder.integer = key;
   1.757 +    return _uhash_remove(hash, keyholder).integer;
   1.758 +}
   1.759 +
   1.760 +U_CAPI void U_EXPORT2
   1.761 +uhash_removeAll(UHashtable *hash) {
   1.762 +    int32_t pos = -1;
   1.763 +    const UHashElement *e;
   1.764 +    U_ASSERT(hash != NULL);
   1.765 +    if (hash->count != 0) {
   1.766 +        while ((e = uhash_nextElement(hash, &pos)) != NULL) {
   1.767 +            uhash_removeElement(hash, e);
   1.768 +        }
   1.769 +    }
   1.770 +    U_ASSERT(hash->count == 0);
   1.771 +}
   1.772 +
   1.773 +U_CAPI const UHashElement* U_EXPORT2
   1.774 +uhash_find(const UHashtable *hash, const void* key) {
   1.775 +    UHashTok keyholder;
   1.776 +    const UHashElement *e;
   1.777 +    keyholder.pointer = (void*) key;
   1.778 +    e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
   1.779 +    return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
   1.780 +}
   1.781 +
   1.782 +U_CAPI const UHashElement* U_EXPORT2
   1.783 +uhash_nextElement(const UHashtable *hash, int32_t *pos) {
   1.784 +    /* Walk through the array until we find an element that is not
   1.785 +     * EMPTY and not DELETED.
   1.786 +     */
   1.787 +    int32_t i;
   1.788 +    U_ASSERT(hash != NULL);
   1.789 +    for (i = *pos + 1; i < hash->length; ++i) {
   1.790 +        if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
   1.791 +            *pos = i;
   1.792 +            return &(hash->elements[i]);
   1.793 +        }
   1.794 +    }
   1.795 +
   1.796 +    /* No more elements */
   1.797 +    return NULL;
   1.798 +}
   1.799 +
   1.800 +U_CAPI void* U_EXPORT2
   1.801 +uhash_removeElement(UHashtable *hash, const UHashElement* e) {
   1.802 +    U_ASSERT(hash != NULL);
   1.803 +    U_ASSERT(e != NULL);
   1.804 +    if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
   1.805 +        UHashElement *nce = (UHashElement *)e;
   1.806 +        return _uhash_internalRemoveElement(hash, nce).pointer;
   1.807 +    }
   1.808 +    return NULL;
   1.809 +}
   1.810 +
   1.811 +/********************************************************************
   1.812 + * UHashTok convenience
   1.813 + ********************************************************************/
   1.814 +
   1.815 +/**
   1.816 + * Return a UHashTok for an integer.
   1.817 + */
   1.818 +/*U_CAPI UHashTok U_EXPORT2
   1.819 +uhash_toki(int32_t i) {
   1.820 +    UHashTok tok;
   1.821 +    tok.integer = i;
   1.822 +    return tok;
   1.823 +}*/
   1.824 +
   1.825 +/**
   1.826 + * Return a UHashTok for a pointer.
   1.827 + */
   1.828 +/*U_CAPI UHashTok U_EXPORT2
   1.829 +uhash_tokp(void* p) {
   1.830 +    UHashTok tok;
   1.831 +    tok.pointer = p;
   1.832 +    return tok;
   1.833 +}*/
   1.834 +
   1.835 +/********************************************************************
   1.836 + * PUBLIC Key Hash Functions
   1.837 + ********************************************************************/
   1.838 +
   1.839 +U_CAPI int32_t U_EXPORT2
   1.840 +uhash_hashUChars(const UHashTok key) {
   1.841 +    const UChar *s = (const UChar *)key.pointer;
   1.842 +    return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
   1.843 +}
   1.844 +
   1.845 +U_CAPI int32_t U_EXPORT2
   1.846 +uhash_hashChars(const UHashTok key) {
   1.847 +    const char *s = (const char *)key.pointer;
   1.848 +    return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
   1.849 +}
   1.850 +
   1.851 +U_CAPI int32_t U_EXPORT2
   1.852 +uhash_hashIChars(const UHashTok key) {
   1.853 +    const char *s = (const char *)key.pointer;
   1.854 +    return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
   1.855 +}
   1.856 +
   1.857 +U_CAPI UBool U_EXPORT2 
   1.858 +uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
   1.859 +    int32_t count1, count2, pos, i;
   1.860 +
   1.861 +    if(hash1==hash2){
   1.862 +        return TRUE;
   1.863 +    }
   1.864 +
   1.865 +    /*
   1.866 +     * Make sure that we are comparing 2 valid hashes of the same type
   1.867 +     * with valid comparison functions.
   1.868 +     * Without valid comparison functions, a binary comparison
   1.869 +     * of the hash values will yield random results on machines
   1.870 +     * with 64-bit pointers and 32-bit integer hashes.
   1.871 +     * A valueComparator is normally optional.
   1.872 +     */
   1.873 +    if (hash1==NULL || hash2==NULL ||
   1.874 +        hash1->keyComparator != hash2->keyComparator ||
   1.875 +        hash1->valueComparator != hash2->valueComparator ||
   1.876 +        hash1->valueComparator == NULL)
   1.877 +    {
   1.878 +        /*
   1.879 +        Normally we would return an error here about incompatible hash tables,
   1.880 +        but we return FALSE instead.
   1.881 +        */
   1.882 +        return FALSE;
   1.883 +    }
   1.884 +
   1.885 +    count1 = uhash_count(hash1);
   1.886 +    count2 = uhash_count(hash2);
   1.887 +    if(count1!=count2){
   1.888 +        return FALSE;
   1.889 +    }
   1.890 +    
   1.891 +    pos=-1;
   1.892 +    for(i=0; i<count1; i++){
   1.893 +        const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
   1.894 +        const UHashTok key1 = elem1->key;
   1.895 +        const UHashTok val1 = elem1->value;
   1.896 +        /* here the keys are not compared, instead the key form hash1 is used to fetch
   1.897 +         * value from hash2. If the hashes are equal then then both hashes should 
   1.898 +         * contain equal values for the same key!
   1.899 +         */
   1.900 +        const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
   1.901 +        const UHashTok val2 = elem2->value;
   1.902 +        if(hash1->valueComparator(val1, val2)==FALSE){
   1.903 +            return FALSE;
   1.904 +        }
   1.905 +    }
   1.906 +    return TRUE;
   1.907 +}
   1.908 +
   1.909 +/********************************************************************
   1.910 + * PUBLIC Comparator Functions
   1.911 + ********************************************************************/
   1.912 +
   1.913 +U_CAPI UBool U_EXPORT2
   1.914 +uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
   1.915 +    const UChar *p1 = (const UChar*) key1.pointer;
   1.916 +    const UChar *p2 = (const UChar*) key2.pointer;
   1.917 +    if (p1 == p2) {
   1.918 +        return TRUE;
   1.919 +    }
   1.920 +    if (p1 == NULL || p2 == NULL) {
   1.921 +        return FALSE;
   1.922 +    }
   1.923 +    while (*p1 != 0 && *p1 == *p2) {
   1.924 +        ++p1;
   1.925 +        ++p2;
   1.926 +    }
   1.927 +    return (UBool)(*p1 == *p2);
   1.928 +}
   1.929 +
   1.930 +U_CAPI UBool U_EXPORT2
   1.931 +uhash_compareChars(const UHashTok key1, const UHashTok key2) {
   1.932 +    const char *p1 = (const char*) key1.pointer;
   1.933 +    const char *p2 = (const char*) key2.pointer;
   1.934 +    if (p1 == p2) {
   1.935 +        return TRUE;
   1.936 +    }
   1.937 +    if (p1 == NULL || p2 == NULL) {
   1.938 +        return FALSE;
   1.939 +    }
   1.940 +    while (*p1 != 0 && *p1 == *p2) {
   1.941 +        ++p1;
   1.942 +        ++p2;
   1.943 +    }
   1.944 +    return (UBool)(*p1 == *p2);
   1.945 +}
   1.946 +
   1.947 +U_CAPI UBool U_EXPORT2
   1.948 +uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
   1.949 +    const char *p1 = (const char*) key1.pointer;
   1.950 +    const char *p2 = (const char*) key2.pointer;
   1.951 +    if (p1 == p2) {
   1.952 +        return TRUE;
   1.953 +    }
   1.954 +    if (p1 == NULL || p2 == NULL) {
   1.955 +        return FALSE;
   1.956 +    }
   1.957 +    while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
   1.958 +        ++p1;
   1.959 +        ++p2;
   1.960 +    }
   1.961 +    return (UBool)(*p1 == *p2);
   1.962 +}
   1.963 +
   1.964 +/********************************************************************
   1.965 + * PUBLIC int32_t Support Functions
   1.966 + ********************************************************************/
   1.967 +
   1.968 +U_CAPI int32_t U_EXPORT2
   1.969 +uhash_hashLong(const UHashTok key) {
   1.970 +    return key.integer;
   1.971 +}
   1.972 +
   1.973 +U_CAPI UBool U_EXPORT2
   1.974 +uhash_compareLong(const UHashTok key1, const UHashTok key2) {
   1.975 +    return (UBool)(key1.integer == key2.integer);
   1.976 +}

mercurial