intl/icu/source/common/uhash.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 ******************************************************************************
     3 *   Copyright (C) 1997-2011, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 ******************************************************************************
     6 *   Date        Name        Description
     7 *   03/22/00    aliu        Adapted from original C++ ICU Hashtable.
     8 *   07/06/01    aliu        Modified to support int32_t keys on
     9 *                           platforms with sizeof(void*) < 32.
    10 ******************************************************************************
    11 */
    13 #include "uhash.h"
    14 #include "unicode/ustring.h"
    15 #include "cstring.h"
    16 #include "cmemory.h"
    17 #include "uassert.h"
    18 #include "ustr_imp.h"
    20 /* This hashtable is implemented as a double hash.  All elements are
    21  * stored in a single array with no secondary storage for collision
    22  * resolution (no linked list, etc.).  When there is a hash collision
    23  * (when two unequal keys have the same hashcode) we resolve this by
    24  * using a secondary hash.  The secondary hash is an increment
    25  * computed as a hash function (a different one) of the primary
    26  * hashcode.  This increment is added to the initial hash value to
    27  * obtain further slots assigned to the same hash code.  For this to
    28  * work, the length of the array and the increment must be relatively
    29  * prime.  The easiest way to achieve this is to have the length of
    30  * the array be prime, and the increment be any value from
    31  * 1..length-1.
    32  *
    33  * Hashcodes are 32-bit integers.  We make sure all hashcodes are
    34  * non-negative by masking off the top bit.  This has two effects: (1)
    35  * modulo arithmetic is simplified.  If we allowed negative hashcodes,
    36  * then when we computed hashcode % length, we could get a negative
    37  * result, which we would then have to adjust back into range.  It's
    38  * simpler to just make hashcodes non-negative. (2) It makes it easy
    39  * to check for empty vs. occupied slots in the table.  We just mark
    40  * empty or deleted slots with a negative hashcode.
    41  *
    42  * The central function is _uhash_find().  This function looks for a
    43  * slot matching the given key and hashcode.  If one is found, it
    44  * returns a pointer to that slot.  If the table is full, and no match
    45  * is found, it returns NULL -- in theory.  This would make the code
    46  * more complicated, since all callers of _uhash_find() would then
    47  * have to check for a NULL result.  To keep this from happening, we
    48  * don't allow the table to fill.  When there is only one
    49  * empty/deleted slot left, uhash_put() will refuse to increase the
    50  * count, and fail.  This simplifies the code.  In practice, one will
    51  * seldom encounter this using default UHashtables.  However, if a
    52  * hashtable is set to a U_FIXED resize policy, or if memory is
    53  * exhausted, then the table may fill.
    54  *
    55  * High and low water ratios control rehashing.  They establish levels
    56  * of fullness (from 0 to 1) outside of which the data array is
    57  * reallocated and repopulated.  Setting the low water ratio to zero
    58  * means the table will never shrink.  Setting the high water ratio to
    59  * one means the table will never grow.  The ratios should be
    60  * coordinated with the ratio between successive elements of the
    61  * PRIMES table, so that when the primeIndex is incremented or
    62  * decremented during rehashing, it brings the ratio of count / length
    63  * back into the desired range (between low and high water ratios).
    64  */
    66 /********************************************************************
    67  * PRIVATE Constants, Macros
    68  ********************************************************************/
    70 /* This is a list of non-consecutive primes chosen such that
    71  * PRIMES[i+1] ~ 2*PRIMES[i].  (Currently, the ratio ranges from 1.81
    72  * to 2.18; the inverse ratio ranges from 0.459 to 0.552.)  If this
    73  * ratio is changed, the low and high water ratios should also be
    74  * adjusted to suit.
    75  *
    76  * These prime numbers were also chosen so that they are the largest
    77  * prime number while being less than a power of two.
    78  */
    79 static const int32_t PRIMES[] = {
    80     13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
    81     65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
    82     16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
    83     1073741789, 2147483647 /*, 4294967291 */
    84 };
    86 #define PRIMES_LENGTH (sizeof(PRIMES) / sizeof(PRIMES[0]))
    87 #define DEFAULT_PRIME_INDEX 3
    89 /* These ratios are tuned to the PRIMES array such that a resize
    90  * places the table back into the zone of non-resizing.  That is,
    91  * after a call to _uhash_rehash(), a subsequent call to
    92  * _uhash_rehash() should do nothing (should not churn).  This is only
    93  * a potential problem with U_GROW_AND_SHRINK.
    94  */
    95 static const float RESIZE_POLICY_RATIO_TABLE[6] = {
    96     /* low, high water ratio */
    97     0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
    98     0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
    99     0.0F, 1.0F  /* U_FIXED: Never change size */
   100 };
   102 /*
   103   Invariants for hashcode values:
   105   * DELETED < 0
   106   * EMPTY < 0
   107   * Real hashes >= 0
   109   Hashcodes may not start out this way, but internally they are
   110   adjusted so that they are always positive.  We assume 32-bit
   111   hashcodes; adjust these constants for other hashcode sizes.
   112 */
   113 #define HASH_DELETED    ((int32_t) 0x80000000)
   114 #define HASH_EMPTY      ((int32_t) HASH_DELETED + 1)
   116 #define IS_EMPTY_OR_DELETED(x) ((x) < 0)
   118 /* This macro expects a UHashTok.pointer as its keypointer and
   119    valuepointer parameters */
   120 #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \
   121             if (hash->keyDeleter != NULL && keypointer != NULL) { \
   122                 (*hash->keyDeleter)(keypointer); \
   123             } \
   124             if (hash->valueDeleter != NULL && valuepointer != NULL) { \
   125                 (*hash->valueDeleter)(valuepointer); \
   126             }
   128 /*
   129  * Constants for hinting whether a key or value is an integer
   130  * or a pointer.  If a hint bit is zero, then the associated
   131  * token is assumed to be an integer.
   132  */
   133 #define HINT_KEY_POINTER   (1)
   134 #define HINT_VALUE_POINTER (2)
   136 /********************************************************************
   137  * PRIVATE Implementation
   138  ********************************************************************/
   140 static UHashTok
   141 _uhash_setElement(UHashtable *hash, UHashElement* e,
   142                   int32_t hashcode,
   143                   UHashTok key, UHashTok value, int8_t hint) {
   145     UHashTok oldValue = e->value;
   146     if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
   147         e->key.pointer != key.pointer) { /* Avoid double deletion */
   148         (*hash->keyDeleter)(e->key.pointer);
   149     }
   150     if (hash->valueDeleter != NULL) {
   151         if (oldValue.pointer != NULL &&
   152             oldValue.pointer != value.pointer) { /* Avoid double deletion */
   153             (*hash->valueDeleter)(oldValue.pointer);
   154         }
   155         oldValue.pointer = NULL;
   156     }
   157     /* Compilers should copy the UHashTok union correctly, but even if
   158      * they do, memory heap tools (e.g. BoundsChecker) can get
   159      * confused when a pointer is cloaked in a union and then copied.
   160      * TO ALLEVIATE THIS, we use hints (based on what API the user is
   161      * calling) to copy pointers when we know the user thinks
   162      * something is a pointer. */
   163     if (hint & HINT_KEY_POINTER) {
   164         e->key.pointer = key.pointer;
   165     } else {
   166         e->key = key;
   167     }
   168     if (hint & HINT_VALUE_POINTER) {
   169         e->value.pointer = value.pointer;
   170     } else {
   171         e->value = value;
   172     }
   173     e->hashcode = hashcode;
   174     return oldValue;
   175 }
   177 /**
   178  * Assumes that the given element is not empty or deleted.
   179  */
   180 static UHashTok
   181 _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
   182     UHashTok empty;
   183     U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
   184     --hash->count;
   185     empty.pointer = NULL; empty.integer = 0;
   186     return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
   187 }
   189 static void
   190 _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
   191     U_ASSERT(hash != NULL);
   192     U_ASSERT(((int32_t)policy) >= 0);
   193     U_ASSERT(((int32_t)policy) < 3);
   194     hash->lowWaterRatio  = RESIZE_POLICY_RATIO_TABLE[policy * 2];
   195     hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
   196 }
   198 /**
   199  * Allocate internal data array of a size determined by the given
   200  * prime index.  If the index is out of range it is pinned into range.
   201  * If the allocation fails the status is set to
   202  * U_MEMORY_ALLOCATION_ERROR and all array storage is freed.  In
   203  * either case the previous array pointer is overwritten.
   204  *
   205  * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
   206  */
   207 static void
   208 _uhash_allocate(UHashtable *hash,
   209                 int32_t primeIndex,
   210                 UErrorCode *status) {
   212     UHashElement *p, *limit;
   213     UHashTok emptytok;
   215     if (U_FAILURE(*status)) return;
   217     U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
   219     hash->primeIndex = primeIndex;
   220     hash->length = PRIMES[primeIndex];
   222     p = hash->elements = (UHashElement*)
   223         uprv_malloc(sizeof(UHashElement) * hash->length);
   225     if (hash->elements == NULL) {
   226         *status = U_MEMORY_ALLOCATION_ERROR;
   227         return;
   228     }
   230     emptytok.pointer = NULL; /* Only one of these two is needed */
   231     emptytok.integer = 0;    /* but we don't know which one. */
   233     limit = p + hash->length;
   234     while (p < limit) {
   235         p->key = emptytok;
   236         p->value = emptytok;
   237         p->hashcode = HASH_EMPTY;
   238         ++p;
   239     }
   241     hash->count = 0;
   242     hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
   243     hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
   244 }
   246 static UHashtable*
   247 _uhash_init(UHashtable *result,
   248               UHashFunction *keyHash, 
   249               UKeyComparator *keyComp,
   250               UValueComparator *valueComp,
   251               int32_t primeIndex,
   252               UErrorCode *status)
   253 {
   254     if (U_FAILURE(*status)) return NULL;
   255     U_ASSERT(keyHash != NULL);
   256     U_ASSERT(keyComp != NULL);
   258     result->keyHasher       = keyHash;
   259     result->keyComparator   = keyComp;
   260     result->valueComparator = valueComp;
   261     result->keyDeleter      = NULL;
   262     result->valueDeleter    = NULL;
   263     result->allocated       = FALSE;
   264     _uhash_internalSetResizePolicy(result, U_GROW);
   266     _uhash_allocate(result, primeIndex, status);
   268     if (U_FAILURE(*status)) {
   269         return NULL;
   270     }
   272     return result;
   273 }
   275 static UHashtable*
   276 _uhash_create(UHashFunction *keyHash, 
   277               UKeyComparator *keyComp,
   278               UValueComparator *valueComp,
   279               int32_t primeIndex,
   280               UErrorCode *status) {
   281     UHashtable *result;
   283     if (U_FAILURE(*status)) return NULL;
   285     result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
   286     if (result == NULL) {
   287         *status = U_MEMORY_ALLOCATION_ERROR;
   288         return NULL;
   289     }
   291     _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
   292     result->allocated       = TRUE;
   294     if (U_FAILURE(*status)) {
   295         uprv_free(result);
   296         return NULL;
   297     }
   299     return result;
   300 }
   302 /**
   303  * Look for a key in the table, or if no such key exists, the first
   304  * empty slot matching the given hashcode.  Keys are compared using
   305  * the keyComparator function.
   306  *
   307  * First find the start position, which is the hashcode modulo
   308  * the length.  Test it to see if it is:
   309  *
   310  * a. identical:  First check the hash values for a quick check,
   311  *    then compare keys for equality using keyComparator.
   312  * b. deleted
   313  * c. empty
   314  *
   315  * Stop if it is identical or empty, otherwise continue by adding a
   316  * "jump" value (moduloing by the length again to keep it within
   317  * range) and retesting.  For efficiency, there need enough empty
   318  * values so that the searchs stop within a reasonable amount of time.
   319  * This can be changed by changing the high/low water marks.
   320  *
   321  * In theory, this function can return NULL, if it is full (no empty
   322  * or deleted slots) and if no matching key is found.  In practice, we
   323  * prevent this elsewhere (in uhash_put) by making sure the last slot
   324  * in the table is never filled.
   325  *
   326  * The size of the table should be prime for this algorithm to work;
   327  * otherwise we are not guaranteed that the jump value (the secondary
   328  * hash) is relatively prime to the table length.
   329  */
   330 static UHashElement*
   331 _uhash_find(const UHashtable *hash, UHashTok key,
   332             int32_t hashcode) {
   334     int32_t firstDeleted = -1;  /* assume invalid index */
   335     int32_t theIndex, startIndex;
   336     int32_t jump = 0; /* lazy evaluate */
   337     int32_t tableHash;
   338     UHashElement *elements = hash->elements;
   340     hashcode &= 0x7FFFFFFF; /* must be positive */
   341     startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
   343     do {
   344         tableHash = elements[theIndex].hashcode;
   345         if (tableHash == hashcode) {          /* quick check */
   346             if ((*hash->keyComparator)(key, elements[theIndex].key)) {
   347                 return &(elements[theIndex]);
   348             }
   349         } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
   350             /* We have hit a slot which contains a key-value pair,
   351              * but for which the hash code does not match.  Keep
   352              * looking.
   353              */
   354         } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
   355             break;
   356         } else if (firstDeleted < 0) { /* remember first deleted */
   357             firstDeleted = theIndex;
   358         }
   359         if (jump == 0) { /* lazy compute jump */
   360             /* The jump value must be relatively prime to the table
   361              * length.  As long as the length is prime, then any value
   362              * 1..length-1 will be relatively prime to it.
   363              */
   364             jump = (hashcode % (hash->length - 1)) + 1;
   365         }
   366         theIndex = (theIndex + jump) % hash->length;
   367     } while (theIndex != startIndex);
   369     if (firstDeleted >= 0) {
   370         theIndex = firstDeleted; /* reset if had deleted slot */
   371     } else if (tableHash != HASH_EMPTY) {
   372         /* We get to this point if the hashtable is full (no empty or
   373          * deleted slots), and we've failed to find a match.  THIS
   374          * WILL NEVER HAPPEN as long as uhash_put() makes sure that
   375          * count is always < length.
   376          */
   377         U_ASSERT(FALSE);
   378         return NULL; /* Never happens if uhash_put() behaves */
   379     }
   380     return &(elements[theIndex]);
   381 }
   383 /**
   384  * Attempt to grow or shrink the data arrays in order to make the
   385  * count fit between the high and low water marks.  hash_put() and
   386  * hash_remove() call this method when the count exceeds the high or
   387  * low water marks.  This method may do nothing, if memory allocation
   388  * fails, or if the count is already in range, or if the length is
   389  * already at the low or high limit.  In any case, upon return the
   390  * arrays will be valid.
   391  */
   392 static void
   393 _uhash_rehash(UHashtable *hash, UErrorCode *status) {
   395     UHashElement *old = hash->elements;
   396     int32_t oldLength = hash->length;
   397     int32_t newPrimeIndex = hash->primeIndex;
   398     int32_t i;
   400     if (hash->count > hash->highWaterMark) {
   401         if (++newPrimeIndex >= PRIMES_LENGTH) {
   402             return;
   403         }
   404     } else if (hash->count < hash->lowWaterMark) {
   405         if (--newPrimeIndex < 0) {
   406             return;
   407         }
   408     } else {
   409         return;
   410     }
   412     _uhash_allocate(hash, newPrimeIndex, status);
   414     if (U_FAILURE(*status)) {
   415         hash->elements = old;
   416         hash->length = oldLength;       
   417         return;
   418     }
   420     for (i = oldLength - 1; i >= 0; --i) {
   421         if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
   422             UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
   423             U_ASSERT(e != NULL);
   424             U_ASSERT(e->hashcode == HASH_EMPTY);
   425             e->key = old[i].key;
   426             e->value = old[i].value;
   427             e->hashcode = old[i].hashcode;
   428             ++hash->count;
   429         }
   430     }
   432     uprv_free(old);
   433 }
   435 static UHashTok
   436 _uhash_remove(UHashtable *hash,
   437               UHashTok key) {
   438     /* First find the position of the key in the table.  If the object
   439      * has not been removed already, remove it.  If the user wanted
   440      * keys deleted, then delete it also.  We have to put a special
   441      * hashcode in that position that means that something has been
   442      * deleted, since when we do a find, we have to continue PAST any
   443      * deleted values.
   444      */
   445     UHashTok result;
   446     UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
   447     U_ASSERT(e != NULL);
   448     result.pointer = NULL;
   449     result.integer = 0;
   450     if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
   451         result = _uhash_internalRemoveElement(hash, e);
   452         if (hash->count < hash->lowWaterMark) {
   453             UErrorCode status = U_ZERO_ERROR;
   454             _uhash_rehash(hash, &status);
   455         }
   456     }
   457     return result;
   458 }
   460 static UHashTok
   461 _uhash_put(UHashtable *hash,
   462            UHashTok key,
   463            UHashTok value,
   464            int8_t hint,
   465            UErrorCode *status) {
   467     /* Put finds the position in the table for the new value.  If the
   468      * key is already in the table, it is deleted, if there is a
   469      * non-NULL keyDeleter.  Then the key, the hash and the value are
   470      * all put at the position in their respective arrays.
   471      */
   472     int32_t hashcode;
   473     UHashElement* e;
   474     UHashTok emptytok;
   476     if (U_FAILURE(*status)) {
   477         goto err;
   478     }
   479     U_ASSERT(hash != NULL);
   480     /* Cannot always check pointer here or iSeries sees NULL every time. */
   481     if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
   482         /* Disallow storage of NULL values, since NULL is returned by
   483          * get() to indicate an absent key.  Storing NULL == removing.
   484          */
   485         return _uhash_remove(hash, key);
   486     }
   487     if (hash->count > hash->highWaterMark) {
   488         _uhash_rehash(hash, status);
   489         if (U_FAILURE(*status)) {
   490             goto err;
   491         }
   492     }
   494     hashcode = (*hash->keyHasher)(key);
   495     e = _uhash_find(hash, key, hashcode);
   496     U_ASSERT(e != NULL);
   498     if (IS_EMPTY_OR_DELETED(e->hashcode)) {
   499         /* Important: We must never actually fill the table up.  If we
   500          * do so, then _uhash_find() will return NULL, and we'll have
   501          * to check for NULL after every call to _uhash_find().  To
   502          * avoid this we make sure there is always at least one empty
   503          * or deleted slot in the table.  This only is a problem if we
   504          * are out of memory and rehash isn't working.
   505          */
   506         ++hash->count;
   507         if (hash->count == hash->length) {
   508             /* Don't allow count to reach length */
   509             --hash->count;
   510             *status = U_MEMORY_ALLOCATION_ERROR;
   511             goto err;
   512         }
   513     }
   515     /* We must in all cases handle storage properly.  If there was an
   516      * old key, then it must be deleted (if the deleter != NULL).
   517      * Make hashcodes stored in table positive.
   518      */
   519     return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
   521  err:
   522     /* If the deleters are non-NULL, this method adopts its key and/or
   523      * value arguments, and we must be sure to delete the key and/or
   524      * value in all cases, even upon failure.
   525      */
   526     HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
   527     emptytok.pointer = NULL; emptytok.integer = 0;
   528     return emptytok;
   529 }
   532 /********************************************************************
   533  * PUBLIC API
   534  ********************************************************************/
   536 U_CAPI UHashtable* U_EXPORT2
   537 uhash_open(UHashFunction *keyHash, 
   538            UKeyComparator *keyComp,
   539            UValueComparator *valueComp,
   540            UErrorCode *status) {
   542     return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
   543 }
   545 U_CAPI UHashtable* U_EXPORT2
   546 uhash_openSize(UHashFunction *keyHash, 
   547                UKeyComparator *keyComp,
   548                UValueComparator *valueComp,
   549                int32_t size,
   550                UErrorCode *status) {
   552     /* Find the smallest index i for which PRIMES[i] >= size. */
   553     int32_t i = 0;
   554     while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
   555         ++i;
   556     }
   558     return _uhash_create(keyHash, keyComp, valueComp, i, status);
   559 }
   561 U_CAPI UHashtable* U_EXPORT2
   562 uhash_init(UHashtable *fillinResult,
   563            UHashFunction *keyHash, 
   564            UKeyComparator *keyComp,
   565            UValueComparator *valueComp,
   566            UErrorCode *status) {
   568     return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
   569 }
   571 U_CAPI void U_EXPORT2
   572 uhash_close(UHashtable *hash) {
   573     if (hash == NULL) {
   574         return;
   575     }
   576     if (hash->elements != NULL) {
   577         if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
   578             int32_t pos=-1;
   579             UHashElement *e;
   580             while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
   581                 HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
   582             }
   583         }
   584         uprv_free(hash->elements);
   585         hash->elements = NULL;
   586     }
   587     if (hash->allocated) {
   588         uprv_free(hash);
   589     }
   590 }
   592 U_CAPI UHashFunction *U_EXPORT2
   593 uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
   594     UHashFunction *result = hash->keyHasher;
   595     hash->keyHasher = fn;
   596     return result;
   597 }
   599 U_CAPI UKeyComparator *U_EXPORT2
   600 uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
   601     UKeyComparator *result = hash->keyComparator;
   602     hash->keyComparator = fn;
   603     return result;
   604 }
   605 U_CAPI UValueComparator *U_EXPORT2 
   606 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
   607     UValueComparator *result = hash->valueComparator;
   608     hash->valueComparator = fn;
   609     return result;
   610 }
   612 U_CAPI UObjectDeleter *U_EXPORT2
   613 uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
   614     UObjectDeleter *result = hash->keyDeleter;
   615     hash->keyDeleter = fn;
   616     return result;
   617 }
   619 U_CAPI UObjectDeleter *U_EXPORT2
   620 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
   621     UObjectDeleter *result = hash->valueDeleter;
   622     hash->valueDeleter = fn;
   623     return result;
   624 }
   626 U_CAPI void U_EXPORT2
   627 uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
   628     UErrorCode status = U_ZERO_ERROR;
   629     _uhash_internalSetResizePolicy(hash, policy);
   630     hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
   631     hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);    
   632     _uhash_rehash(hash, &status);
   633 }
   635 U_CAPI int32_t U_EXPORT2
   636 uhash_count(const UHashtable *hash) {
   637     return hash->count;
   638 }
   640 U_CAPI void* U_EXPORT2
   641 uhash_get(const UHashtable *hash,
   642           const void* key) {
   643     UHashTok keyholder;
   644     keyholder.pointer = (void*) key;
   645     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
   646 }
   648 U_CAPI void* U_EXPORT2
   649 uhash_iget(const UHashtable *hash,
   650            int32_t key) {
   651     UHashTok keyholder;
   652     keyholder.integer = key;
   653     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
   654 }
   656 U_CAPI int32_t U_EXPORT2
   657 uhash_geti(const UHashtable *hash,
   658            const void* key) {
   659     UHashTok keyholder;
   660     keyholder.pointer = (void*) key;
   661     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
   662 }
   664 U_CAPI int32_t U_EXPORT2
   665 uhash_igeti(const UHashtable *hash,
   666            int32_t key) {
   667     UHashTok keyholder;
   668     keyholder.integer = key;
   669     return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
   670 }
   672 U_CAPI void* U_EXPORT2
   673 uhash_put(UHashtable *hash,
   674           void* key,
   675           void* value,
   676           UErrorCode *status) {
   677     UHashTok keyholder, valueholder;
   678     keyholder.pointer = key;
   679     valueholder.pointer = value;
   680     return _uhash_put(hash, keyholder, valueholder,
   681                       HINT_KEY_POINTER | HINT_VALUE_POINTER,
   682                       status).pointer;
   683 }
   685 U_CAPI void* U_EXPORT2
   686 uhash_iput(UHashtable *hash,
   687            int32_t key,
   688            void* value,
   689            UErrorCode *status) {
   690     UHashTok keyholder, valueholder;
   691     keyholder.integer = key;
   692     valueholder.pointer = value;
   693     return _uhash_put(hash, keyholder, valueholder,
   694                       HINT_VALUE_POINTER,
   695                       status).pointer;
   696 }
   698 U_CAPI int32_t U_EXPORT2
   699 uhash_puti(UHashtable *hash,
   700            void* key,
   701            int32_t value,
   702            UErrorCode *status) {
   703     UHashTok keyholder, valueholder;
   704     keyholder.pointer = key;
   705     valueholder.integer = value;
   706     return _uhash_put(hash, keyholder, valueholder,
   707                       HINT_KEY_POINTER,
   708                       status).integer;
   709 }
   712 U_CAPI int32_t U_EXPORT2
   713 uhash_iputi(UHashtable *hash,
   714            int32_t key,
   715            int32_t value,
   716            UErrorCode *status) {
   717     UHashTok keyholder, valueholder;
   718     keyholder.integer = key;
   719     valueholder.integer = value;
   720     return _uhash_put(hash, keyholder, valueholder,
   721                       0, /* neither is a ptr */
   722                       status).integer;
   723 }
   725 U_CAPI void* U_EXPORT2
   726 uhash_remove(UHashtable *hash,
   727              const void* key) {
   728     UHashTok keyholder;
   729     keyholder.pointer = (void*) key;
   730     return _uhash_remove(hash, keyholder).pointer;
   731 }
   733 U_CAPI void* U_EXPORT2
   734 uhash_iremove(UHashtable *hash,
   735               int32_t key) {
   736     UHashTok keyholder;
   737     keyholder.integer = key;
   738     return _uhash_remove(hash, keyholder).pointer;
   739 }
   741 U_CAPI int32_t U_EXPORT2
   742 uhash_removei(UHashtable *hash,
   743               const void* key) {
   744     UHashTok keyholder;
   745     keyholder.pointer = (void*) key;
   746     return _uhash_remove(hash, keyholder).integer;
   747 }
   749 U_CAPI int32_t U_EXPORT2
   750 uhash_iremovei(UHashtable *hash,
   751                int32_t key) {
   752     UHashTok keyholder;
   753     keyholder.integer = key;
   754     return _uhash_remove(hash, keyholder).integer;
   755 }
   757 U_CAPI void U_EXPORT2
   758 uhash_removeAll(UHashtable *hash) {
   759     int32_t pos = -1;
   760     const UHashElement *e;
   761     U_ASSERT(hash != NULL);
   762     if (hash->count != 0) {
   763         while ((e = uhash_nextElement(hash, &pos)) != NULL) {
   764             uhash_removeElement(hash, e);
   765         }
   766     }
   767     U_ASSERT(hash->count == 0);
   768 }
   770 U_CAPI const UHashElement* U_EXPORT2
   771 uhash_find(const UHashtable *hash, const void* key) {
   772     UHashTok keyholder;
   773     const UHashElement *e;
   774     keyholder.pointer = (void*) key;
   775     e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
   776     return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
   777 }
   779 U_CAPI const UHashElement* U_EXPORT2
   780 uhash_nextElement(const UHashtable *hash, int32_t *pos) {
   781     /* Walk through the array until we find an element that is not
   782      * EMPTY and not DELETED.
   783      */
   784     int32_t i;
   785     U_ASSERT(hash != NULL);
   786     for (i = *pos + 1; i < hash->length; ++i) {
   787         if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
   788             *pos = i;
   789             return &(hash->elements[i]);
   790         }
   791     }
   793     /* No more elements */
   794     return NULL;
   795 }
   797 U_CAPI void* U_EXPORT2
   798 uhash_removeElement(UHashtable *hash, const UHashElement* e) {
   799     U_ASSERT(hash != NULL);
   800     U_ASSERT(e != NULL);
   801     if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
   802         UHashElement *nce = (UHashElement *)e;
   803         return _uhash_internalRemoveElement(hash, nce).pointer;
   804     }
   805     return NULL;
   806 }
   808 /********************************************************************
   809  * UHashTok convenience
   810  ********************************************************************/
   812 /**
   813  * Return a UHashTok for an integer.
   814  */
   815 /*U_CAPI UHashTok U_EXPORT2
   816 uhash_toki(int32_t i) {
   817     UHashTok tok;
   818     tok.integer = i;
   819     return tok;
   820 }*/
   822 /**
   823  * Return a UHashTok for a pointer.
   824  */
   825 /*U_CAPI UHashTok U_EXPORT2
   826 uhash_tokp(void* p) {
   827     UHashTok tok;
   828     tok.pointer = p;
   829     return tok;
   830 }*/
   832 /********************************************************************
   833  * PUBLIC Key Hash Functions
   834  ********************************************************************/
   836 U_CAPI int32_t U_EXPORT2
   837 uhash_hashUChars(const UHashTok key) {
   838     const UChar *s = (const UChar *)key.pointer;
   839     return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
   840 }
   842 U_CAPI int32_t U_EXPORT2
   843 uhash_hashChars(const UHashTok key) {
   844     const char *s = (const char *)key.pointer;
   845     return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
   846 }
   848 U_CAPI int32_t U_EXPORT2
   849 uhash_hashIChars(const UHashTok key) {
   850     const char *s = (const char *)key.pointer;
   851     return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
   852 }
   854 U_CAPI UBool U_EXPORT2 
   855 uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
   856     int32_t count1, count2, pos, i;
   858     if(hash1==hash2){
   859         return TRUE;
   860     }
   862     /*
   863      * Make sure that we are comparing 2 valid hashes of the same type
   864      * with valid comparison functions.
   865      * Without valid comparison functions, a binary comparison
   866      * of the hash values will yield random results on machines
   867      * with 64-bit pointers and 32-bit integer hashes.
   868      * A valueComparator is normally optional.
   869      */
   870     if (hash1==NULL || hash2==NULL ||
   871         hash1->keyComparator != hash2->keyComparator ||
   872         hash1->valueComparator != hash2->valueComparator ||
   873         hash1->valueComparator == NULL)
   874     {
   875         /*
   876         Normally we would return an error here about incompatible hash tables,
   877         but we return FALSE instead.
   878         */
   879         return FALSE;
   880     }
   882     count1 = uhash_count(hash1);
   883     count2 = uhash_count(hash2);
   884     if(count1!=count2){
   885         return FALSE;
   886     }
   888     pos=-1;
   889     for(i=0; i<count1; i++){
   890         const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
   891         const UHashTok key1 = elem1->key;
   892         const UHashTok val1 = elem1->value;
   893         /* here the keys are not compared, instead the key form hash1 is used to fetch
   894          * value from hash2. If the hashes are equal then then both hashes should 
   895          * contain equal values for the same key!
   896          */
   897         const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
   898         const UHashTok val2 = elem2->value;
   899         if(hash1->valueComparator(val1, val2)==FALSE){
   900             return FALSE;
   901         }
   902     }
   903     return TRUE;
   904 }
   906 /********************************************************************
   907  * PUBLIC Comparator Functions
   908  ********************************************************************/
   910 U_CAPI UBool U_EXPORT2
   911 uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
   912     const UChar *p1 = (const UChar*) key1.pointer;
   913     const UChar *p2 = (const UChar*) key2.pointer;
   914     if (p1 == p2) {
   915         return TRUE;
   916     }
   917     if (p1 == NULL || p2 == NULL) {
   918         return FALSE;
   919     }
   920     while (*p1 != 0 && *p1 == *p2) {
   921         ++p1;
   922         ++p2;
   923     }
   924     return (UBool)(*p1 == *p2);
   925 }
   927 U_CAPI UBool U_EXPORT2
   928 uhash_compareChars(const UHashTok key1, const UHashTok key2) {
   929     const char *p1 = (const char*) key1.pointer;
   930     const char *p2 = (const char*) key2.pointer;
   931     if (p1 == p2) {
   932         return TRUE;
   933     }
   934     if (p1 == NULL || p2 == NULL) {
   935         return FALSE;
   936     }
   937     while (*p1 != 0 && *p1 == *p2) {
   938         ++p1;
   939         ++p2;
   940     }
   941     return (UBool)(*p1 == *p2);
   942 }
   944 U_CAPI UBool U_EXPORT2
   945 uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
   946     const char *p1 = (const char*) key1.pointer;
   947     const char *p2 = (const char*) key2.pointer;
   948     if (p1 == p2) {
   949         return TRUE;
   950     }
   951     if (p1 == NULL || p2 == NULL) {
   952         return FALSE;
   953     }
   954     while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
   955         ++p1;
   956         ++p2;
   957     }
   958     return (UBool)(*p1 == *p2);
   959 }
   961 /********************************************************************
   962  * PUBLIC int32_t Support Functions
   963  ********************************************************************/
   965 U_CAPI int32_t U_EXPORT2
   966 uhash_hashLong(const UHashTok key) {
   967     return key.integer;
   968 }
   970 U_CAPI UBool U_EXPORT2
   971 uhash_compareLong(const UHashTok key1, const UHashTok key2) {
   972     return (UBool)(key1.integer == key2.integer);
   973 }

mercurial