intl/icu/source/i18n/sortkey.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 *******************************************************************************
     3 * Copyright (C) 1996-2012, International Business Machines Corporation and
     4 * others. All Rights Reserved.
     5 *******************************************************************************
     6 */
     7 //===============================================================================
     8 //
     9 // File sortkey.cpp
    10 //
    11 //
    12 //
    13 // Created by: Helena Shih
    14 //
    15 // Modification History:
    16 //
    17 //  Date         Name          Description
    18 //
    19 //  6/20/97      helena        Java class name change.
    20 //  6/23/97      helena        Added comments to make code more readable.
    21 //  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
    22 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
    23 //                             Cleaned up operator=
    24 // 07/12/99      helena        HPUX 11 CC port.
    25 // 03/06/01      synwee        Modified compareTo, to handle the result of
    26 //                             2 string similar in contents, but one is longer
    27 //                             than the other
    28 //===============================================================================
    30 #include "unicode/utypes.h"
    32 #if !UCONFIG_NO_COLLATION
    34 #include "unicode/sortkey.h"
    35 #include "cmemory.h"
    36 #include "uelement.h"
    37 #include "ustr_imp.h"
    39 U_NAMESPACE_BEGIN
    41 // A hash code of kInvalidHashCode indicates that the hash code needs
    42 // to be computed. A hash code of kEmptyHashCode is used for empty keys
    43 // and for any key whose computed hash code is kInvalidHashCode.
    44 static const int32_t kInvalidHashCode = 0;
    45 static const int32_t kEmptyHashCode = 1;
    46 // The "bogus hash code" replaces a separate fBogus flag.
    47 static const int32_t kBogusHashCode = 2;
    49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
    51 CollationKey::CollationKey()
    52     : UObject(), fFlagAndLength(0),
    53       fHashCode(kEmptyHashCode)
    54 {
    55 }
    57 // Create a collation key from a bit array.
    58 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
    59     : UObject(), fFlagAndLength(count),
    60       fHashCode(kInvalidHashCode)
    61 {
    62     if (count < 0 || (newValues == NULL && count != 0) ||
    63             (count > getCapacity() && reallocate(count, 0) == NULL)) {
    64         setToBogus();
    65         return;
    66     }
    68     if (count > 0) {
    69         uprv_memcpy(getBytes(), newValues, count);
    70     }
    71 }
    73 CollationKey::CollationKey(const CollationKey& other)
    74     : UObject(other), fFlagAndLength(other.getLength()),
    75       fHashCode(other.fHashCode)
    76 {
    77     if (other.isBogus())
    78     {
    79         setToBogus();
    80         return;
    81     }
    83     int32_t length = fFlagAndLength;
    84     if (length > getCapacity() && reallocate(length, 0) == NULL) {
    85         setToBogus();
    86         return;
    87     }
    89     if (length > 0) {
    90         uprv_memcpy(getBytes(), other.getBytes(), length);
    91     }
    92 }
    94 CollationKey::~CollationKey()
    95 {
    96     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
    97 }
    99 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
   100     uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
   101     if(newBytes == NULL) { return NULL; }
   102     if(length > 0) {
   103         uprv_memcpy(newBytes, getBytes(), length);
   104     }
   105     if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
   106     fUnion.fFields.fBytes = newBytes;
   107     fUnion.fFields.fCapacity = newCapacity;
   108     fFlagAndLength |= 0x80000000;
   109     return newBytes;
   110 }
   112 void CollationKey::setLength(int32_t newLength) {
   113     // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
   114     fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
   115     fHashCode = kInvalidHashCode;
   116 }
   118 // set the key to an empty state
   119 CollationKey&
   120 CollationKey::reset()
   121 {
   122     fFlagAndLength &= 0x80000000;
   123     fHashCode = kEmptyHashCode;
   125     return *this;
   126 }
   128 // set the key to a "bogus" or invalid state
   129 CollationKey&
   130 CollationKey::setToBogus()
   131 {
   132     fFlagAndLength &= 0x80000000;
   133     fHashCode = kBogusHashCode;
   135     return *this;
   136 }
   138 UBool
   139 CollationKey::operator==(const CollationKey& source) const
   140 {
   141     return getLength() == source.getLength() &&
   142             (this == &source ||
   143              uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
   144 }
   146 const CollationKey&
   147 CollationKey::operator=(const CollationKey& other)
   148 {
   149     if (this != &other)
   150     {
   151         if (other.isBogus())
   152         {
   153             return setToBogus();
   154         }
   156         int32_t length = other.getLength();
   157         if (length > getCapacity() && reallocate(length, 0) == NULL) {
   158             return setToBogus();
   159         }
   160         if (length > 0) {
   161             uprv_memcpy(getBytes(), other.getBytes(), length);
   162         }
   163         fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
   164         fHashCode = other.fHashCode;
   165     }
   167     return *this;
   168 }
   170 // Bitwise comparison for the collation keys.
   171 Collator::EComparisonResult
   172 CollationKey::compareTo(const CollationKey& target) const
   173 {
   174     UErrorCode errorCode = U_ZERO_ERROR;
   175     return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
   176 }
   178 // Bitwise comparison for the collation keys.
   179 UCollationResult
   180 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
   181 {
   182   if(U_SUCCESS(status)) {
   183     const uint8_t *src = getBytes();
   184     const uint8_t *tgt = target.getBytes();
   186     // are we comparing the same string
   187     if (src == tgt)
   188         return  UCOL_EQUAL;
   190     UCollationResult result;
   192     // are we comparing different lengths?
   193     int32_t minLength = getLength();
   194     int32_t targetLength = target.getLength();
   195     if (minLength < targetLength) {
   196         result = UCOL_LESS;
   197     } else if (minLength == targetLength) {
   198         result = UCOL_EQUAL;
   199     } else {
   200         minLength = targetLength;
   201         result = UCOL_GREATER;
   202     }
   204     if (minLength > 0) {
   205         int diff = uprv_memcmp(src, tgt, minLength);
   206         if (diff > 0) {
   207             return UCOL_GREATER;
   208         }
   209         else
   210             if (diff < 0) {
   211                 return UCOL_LESS;
   212             }
   213     }
   215     return result;
   216   } else {
   217     return UCOL_EQUAL;
   218   }
   219 }
   221 #ifdef U_USE_COLLATION_KEY_DEPRECATES
   222 // Create a copy of the byte array.
   223 uint8_t*
   224 CollationKey::toByteArray(int32_t& count) const
   225 {
   226     uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
   228     if (result == NULL)
   229     {
   230         count = 0;
   231     }
   232     else
   233     {
   234         count = fCount;
   235         if (count > 0) {
   236             uprv_memcpy(result, fBytes, fCount);
   237         }
   238     }
   240     return result;
   241 }
   242 #endif
   244 static int32_t
   245 computeHashCode(const uint8_t *key, int32_t  length) {
   246     const char *s = reinterpret_cast<const char *>(key);
   247     int32_t hash;
   248     if (s == NULL || length == 0) {
   249         hash = kEmptyHashCode;
   250     } else {
   251         hash = ustr_hashCharsN(s, length);
   252         if (hash == kInvalidHashCode || hash == kBogusHashCode) {
   253             hash = kEmptyHashCode;
   254         }
   255     }
   256     return hash;
   257 }
   259 int32_t
   260 CollationKey::hashCode() const
   261 {
   262     // (Cribbed from UnicodeString)
   263     // We cache the hashCode; when it becomes invalid, due to any change to the
   264     // string, we note this by setting it to kInvalidHashCode. [LIU]
   266     // Note: This method is semantically const, but physically non-const.
   268     if (fHashCode == kInvalidHashCode)
   269     {
   270         fHashCode = computeHashCode(getBytes(), getLength());
   271     }
   273     return fHashCode;
   274 }
   276 U_NAMESPACE_END
   278 U_CAPI int32_t U_EXPORT2
   279 ucol_keyHashCode(const uint8_t *key, 
   280                        int32_t  length)
   281 {
   282     return icu::computeHashCode(key, length);
   283 }
   285 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial