intl/icu/source/i18n/sortkey.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 1996-2012, International Business Machines Corporation and
michael@0 4 * others. All Rights Reserved.
michael@0 5 *******************************************************************************
michael@0 6 */
michael@0 7 //===============================================================================
michael@0 8 //
michael@0 9 // File sortkey.cpp
michael@0 10 //
michael@0 11 //
michael@0 12 //
michael@0 13 // Created by: Helena Shih
michael@0 14 //
michael@0 15 // Modification History:
michael@0 16 //
michael@0 17 // Date Name Description
michael@0 18 //
michael@0 19 // 6/20/97 helena Java class name change.
michael@0 20 // 6/23/97 helena Added comments to make code more readable.
michael@0 21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
michael@0 22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
michael@0 23 // Cleaned up operator=
michael@0 24 // 07/12/99 helena HPUX 11 CC port.
michael@0 25 // 03/06/01 synwee Modified compareTo, to handle the result of
michael@0 26 // 2 string similar in contents, but one is longer
michael@0 27 // than the other
michael@0 28 //===============================================================================
michael@0 29
michael@0 30 #include "unicode/utypes.h"
michael@0 31
michael@0 32 #if !UCONFIG_NO_COLLATION
michael@0 33
michael@0 34 #include "unicode/sortkey.h"
michael@0 35 #include "cmemory.h"
michael@0 36 #include "uelement.h"
michael@0 37 #include "ustr_imp.h"
michael@0 38
michael@0 39 U_NAMESPACE_BEGIN
michael@0 40
michael@0 41 // A hash code of kInvalidHashCode indicates that the hash code needs
michael@0 42 // to be computed. A hash code of kEmptyHashCode is used for empty keys
michael@0 43 // and for any key whose computed hash code is kInvalidHashCode.
michael@0 44 static const int32_t kInvalidHashCode = 0;
michael@0 45 static const int32_t kEmptyHashCode = 1;
michael@0 46 // The "bogus hash code" replaces a separate fBogus flag.
michael@0 47 static const int32_t kBogusHashCode = 2;
michael@0 48
michael@0 49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
michael@0 50
michael@0 51 CollationKey::CollationKey()
michael@0 52 : UObject(), fFlagAndLength(0),
michael@0 53 fHashCode(kEmptyHashCode)
michael@0 54 {
michael@0 55 }
michael@0 56
michael@0 57 // Create a collation key from a bit array.
michael@0 58 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
michael@0 59 : UObject(), fFlagAndLength(count),
michael@0 60 fHashCode(kInvalidHashCode)
michael@0 61 {
michael@0 62 if (count < 0 || (newValues == NULL && count != 0) ||
michael@0 63 (count > getCapacity() && reallocate(count, 0) == NULL)) {
michael@0 64 setToBogus();
michael@0 65 return;
michael@0 66 }
michael@0 67
michael@0 68 if (count > 0) {
michael@0 69 uprv_memcpy(getBytes(), newValues, count);
michael@0 70 }
michael@0 71 }
michael@0 72
michael@0 73 CollationKey::CollationKey(const CollationKey& other)
michael@0 74 : UObject(other), fFlagAndLength(other.getLength()),
michael@0 75 fHashCode(other.fHashCode)
michael@0 76 {
michael@0 77 if (other.isBogus())
michael@0 78 {
michael@0 79 setToBogus();
michael@0 80 return;
michael@0 81 }
michael@0 82
michael@0 83 int32_t length = fFlagAndLength;
michael@0 84 if (length > getCapacity() && reallocate(length, 0) == NULL) {
michael@0 85 setToBogus();
michael@0 86 return;
michael@0 87 }
michael@0 88
michael@0 89 if (length > 0) {
michael@0 90 uprv_memcpy(getBytes(), other.getBytes(), length);
michael@0 91 }
michael@0 92 }
michael@0 93
michael@0 94 CollationKey::~CollationKey()
michael@0 95 {
michael@0 96 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
michael@0 97 }
michael@0 98
michael@0 99 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
michael@0 100 uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
michael@0 101 if(newBytes == NULL) { return NULL; }
michael@0 102 if(length > 0) {
michael@0 103 uprv_memcpy(newBytes, getBytes(), length);
michael@0 104 }
michael@0 105 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
michael@0 106 fUnion.fFields.fBytes = newBytes;
michael@0 107 fUnion.fFields.fCapacity = newCapacity;
michael@0 108 fFlagAndLength |= 0x80000000;
michael@0 109 return newBytes;
michael@0 110 }
michael@0 111
michael@0 112 void CollationKey::setLength(int32_t newLength) {
michael@0 113 // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
michael@0 114 fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
michael@0 115 fHashCode = kInvalidHashCode;
michael@0 116 }
michael@0 117
michael@0 118 // set the key to an empty state
michael@0 119 CollationKey&
michael@0 120 CollationKey::reset()
michael@0 121 {
michael@0 122 fFlagAndLength &= 0x80000000;
michael@0 123 fHashCode = kEmptyHashCode;
michael@0 124
michael@0 125 return *this;
michael@0 126 }
michael@0 127
michael@0 128 // set the key to a "bogus" or invalid state
michael@0 129 CollationKey&
michael@0 130 CollationKey::setToBogus()
michael@0 131 {
michael@0 132 fFlagAndLength &= 0x80000000;
michael@0 133 fHashCode = kBogusHashCode;
michael@0 134
michael@0 135 return *this;
michael@0 136 }
michael@0 137
michael@0 138 UBool
michael@0 139 CollationKey::operator==(const CollationKey& source) const
michael@0 140 {
michael@0 141 return getLength() == source.getLength() &&
michael@0 142 (this == &source ||
michael@0 143 uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
michael@0 144 }
michael@0 145
michael@0 146 const CollationKey&
michael@0 147 CollationKey::operator=(const CollationKey& other)
michael@0 148 {
michael@0 149 if (this != &other)
michael@0 150 {
michael@0 151 if (other.isBogus())
michael@0 152 {
michael@0 153 return setToBogus();
michael@0 154 }
michael@0 155
michael@0 156 int32_t length = other.getLength();
michael@0 157 if (length > getCapacity() && reallocate(length, 0) == NULL) {
michael@0 158 return setToBogus();
michael@0 159 }
michael@0 160 if (length > 0) {
michael@0 161 uprv_memcpy(getBytes(), other.getBytes(), length);
michael@0 162 }
michael@0 163 fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
michael@0 164 fHashCode = other.fHashCode;
michael@0 165 }
michael@0 166
michael@0 167 return *this;
michael@0 168 }
michael@0 169
michael@0 170 // Bitwise comparison for the collation keys.
michael@0 171 Collator::EComparisonResult
michael@0 172 CollationKey::compareTo(const CollationKey& target) const
michael@0 173 {
michael@0 174 UErrorCode errorCode = U_ZERO_ERROR;
michael@0 175 return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
michael@0 176 }
michael@0 177
michael@0 178 // Bitwise comparison for the collation keys.
michael@0 179 UCollationResult
michael@0 180 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
michael@0 181 {
michael@0 182 if(U_SUCCESS(status)) {
michael@0 183 const uint8_t *src = getBytes();
michael@0 184 const uint8_t *tgt = target.getBytes();
michael@0 185
michael@0 186 // are we comparing the same string
michael@0 187 if (src == tgt)
michael@0 188 return UCOL_EQUAL;
michael@0 189
michael@0 190 UCollationResult result;
michael@0 191
michael@0 192 // are we comparing different lengths?
michael@0 193 int32_t minLength = getLength();
michael@0 194 int32_t targetLength = target.getLength();
michael@0 195 if (minLength < targetLength) {
michael@0 196 result = UCOL_LESS;
michael@0 197 } else if (minLength == targetLength) {
michael@0 198 result = UCOL_EQUAL;
michael@0 199 } else {
michael@0 200 minLength = targetLength;
michael@0 201 result = UCOL_GREATER;
michael@0 202 }
michael@0 203
michael@0 204 if (minLength > 0) {
michael@0 205 int diff = uprv_memcmp(src, tgt, minLength);
michael@0 206 if (diff > 0) {
michael@0 207 return UCOL_GREATER;
michael@0 208 }
michael@0 209 else
michael@0 210 if (diff < 0) {
michael@0 211 return UCOL_LESS;
michael@0 212 }
michael@0 213 }
michael@0 214
michael@0 215 return result;
michael@0 216 } else {
michael@0 217 return UCOL_EQUAL;
michael@0 218 }
michael@0 219 }
michael@0 220
michael@0 221 #ifdef U_USE_COLLATION_KEY_DEPRECATES
michael@0 222 // Create a copy of the byte array.
michael@0 223 uint8_t*
michael@0 224 CollationKey::toByteArray(int32_t& count) const
michael@0 225 {
michael@0 226 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
michael@0 227
michael@0 228 if (result == NULL)
michael@0 229 {
michael@0 230 count = 0;
michael@0 231 }
michael@0 232 else
michael@0 233 {
michael@0 234 count = fCount;
michael@0 235 if (count > 0) {
michael@0 236 uprv_memcpy(result, fBytes, fCount);
michael@0 237 }
michael@0 238 }
michael@0 239
michael@0 240 return result;
michael@0 241 }
michael@0 242 #endif
michael@0 243
michael@0 244 static int32_t
michael@0 245 computeHashCode(const uint8_t *key, int32_t length) {
michael@0 246 const char *s = reinterpret_cast<const char *>(key);
michael@0 247 int32_t hash;
michael@0 248 if (s == NULL || length == 0) {
michael@0 249 hash = kEmptyHashCode;
michael@0 250 } else {
michael@0 251 hash = ustr_hashCharsN(s, length);
michael@0 252 if (hash == kInvalidHashCode || hash == kBogusHashCode) {
michael@0 253 hash = kEmptyHashCode;
michael@0 254 }
michael@0 255 }
michael@0 256 return hash;
michael@0 257 }
michael@0 258
michael@0 259 int32_t
michael@0 260 CollationKey::hashCode() const
michael@0 261 {
michael@0 262 // (Cribbed from UnicodeString)
michael@0 263 // We cache the hashCode; when it becomes invalid, due to any change to the
michael@0 264 // string, we note this by setting it to kInvalidHashCode. [LIU]
michael@0 265
michael@0 266 // Note: This method is semantically const, but physically non-const.
michael@0 267
michael@0 268 if (fHashCode == kInvalidHashCode)
michael@0 269 {
michael@0 270 fHashCode = computeHashCode(getBytes(), getLength());
michael@0 271 }
michael@0 272
michael@0 273 return fHashCode;
michael@0 274 }
michael@0 275
michael@0 276 U_NAMESPACE_END
michael@0 277
michael@0 278 U_CAPI int32_t U_EXPORT2
michael@0 279 ucol_keyHashCode(const uint8_t *key,
michael@0 280 int32_t length)
michael@0 281 {
michael@0 282 return icu::computeHashCode(key, length);
michael@0 283 }
michael@0 284
michael@0 285 #endif /* #if !UCONFIG_NO_COLLATION */

mercurial