1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/sortkey.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,285 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 1996-2012, International Business Machines Corporation and 1.7 +* others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +*/ 1.10 +//=============================================================================== 1.11 +// 1.12 +// File sortkey.cpp 1.13 +// 1.14 +// 1.15 +// 1.16 +// Created by: Helena Shih 1.17 +// 1.18 +// Modification History: 1.19 +// 1.20 +// Date Name Description 1.21 +// 1.22 +// 6/20/97 helena Java class name change. 1.23 +// 6/23/97 helena Added comments to make code more readable. 1.24 +// 6/26/98 erm Canged to use byte arrays instead of UnicodeString 1.25 +// 7/31/98 erm hashCode: minimum inc should be 2 not 1, 1.26 +// Cleaned up operator= 1.27 +// 07/12/99 helena HPUX 11 CC port. 1.28 +// 03/06/01 synwee Modified compareTo, to handle the result of 1.29 +// 2 string similar in contents, but one is longer 1.30 +// than the other 1.31 +//=============================================================================== 1.32 + 1.33 +#include "unicode/utypes.h" 1.34 + 1.35 +#if !UCONFIG_NO_COLLATION 1.36 + 1.37 +#include "unicode/sortkey.h" 1.38 +#include "cmemory.h" 1.39 +#include "uelement.h" 1.40 +#include "ustr_imp.h" 1.41 + 1.42 +U_NAMESPACE_BEGIN 1.43 + 1.44 +// A hash code of kInvalidHashCode indicates that the hash code needs 1.45 +// to be computed. A hash code of kEmptyHashCode is used for empty keys 1.46 +// and for any key whose computed hash code is kInvalidHashCode. 1.47 +static const int32_t kInvalidHashCode = 0; 1.48 +static const int32_t kEmptyHashCode = 1; 1.49 +// The "bogus hash code" replaces a separate fBogus flag. 1.50 +static const int32_t kBogusHashCode = 2; 1.51 + 1.52 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 1.53 + 1.54 +CollationKey::CollationKey() 1.55 + : UObject(), fFlagAndLength(0), 1.56 + fHashCode(kEmptyHashCode) 1.57 +{ 1.58 +} 1.59 + 1.60 +// Create a collation key from a bit array. 1.61 +CollationKey::CollationKey(const uint8_t* newValues, int32_t count) 1.62 + : UObject(), fFlagAndLength(count), 1.63 + fHashCode(kInvalidHashCode) 1.64 +{ 1.65 + if (count < 0 || (newValues == NULL && count != 0) || 1.66 + (count > getCapacity() && reallocate(count, 0) == NULL)) { 1.67 + setToBogus(); 1.68 + return; 1.69 + } 1.70 + 1.71 + if (count > 0) { 1.72 + uprv_memcpy(getBytes(), newValues, count); 1.73 + } 1.74 +} 1.75 + 1.76 +CollationKey::CollationKey(const CollationKey& other) 1.77 + : UObject(other), fFlagAndLength(other.getLength()), 1.78 + fHashCode(other.fHashCode) 1.79 +{ 1.80 + if (other.isBogus()) 1.81 + { 1.82 + setToBogus(); 1.83 + return; 1.84 + } 1.85 + 1.86 + int32_t length = fFlagAndLength; 1.87 + if (length > getCapacity() && reallocate(length, 0) == NULL) { 1.88 + setToBogus(); 1.89 + return; 1.90 + } 1.91 + 1.92 + if (length > 0) { 1.93 + uprv_memcpy(getBytes(), other.getBytes(), length); 1.94 + } 1.95 +} 1.96 + 1.97 +CollationKey::~CollationKey() 1.98 +{ 1.99 + if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 1.100 +} 1.101 + 1.102 +uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { 1.103 + uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); 1.104 + if(newBytes == NULL) { return NULL; } 1.105 + if(length > 0) { 1.106 + uprv_memcpy(newBytes, getBytes(), length); 1.107 + } 1.108 + if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 1.109 + fUnion.fFields.fBytes = newBytes; 1.110 + fUnion.fFields.fCapacity = newCapacity; 1.111 + fFlagAndLength |= 0x80000000; 1.112 + return newBytes; 1.113 +} 1.114 + 1.115 +void CollationKey::setLength(int32_t newLength) { 1.116 + // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); 1.117 + fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; 1.118 + fHashCode = kInvalidHashCode; 1.119 +} 1.120 + 1.121 +// set the key to an empty state 1.122 +CollationKey& 1.123 +CollationKey::reset() 1.124 +{ 1.125 + fFlagAndLength &= 0x80000000; 1.126 + fHashCode = kEmptyHashCode; 1.127 + 1.128 + return *this; 1.129 +} 1.130 + 1.131 +// set the key to a "bogus" or invalid state 1.132 +CollationKey& 1.133 +CollationKey::setToBogus() 1.134 +{ 1.135 + fFlagAndLength &= 0x80000000; 1.136 + fHashCode = kBogusHashCode; 1.137 + 1.138 + return *this; 1.139 +} 1.140 + 1.141 +UBool 1.142 +CollationKey::operator==(const CollationKey& source) const 1.143 +{ 1.144 + return getLength() == source.getLength() && 1.145 + (this == &source || 1.146 + uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); 1.147 +} 1.148 + 1.149 +const CollationKey& 1.150 +CollationKey::operator=(const CollationKey& other) 1.151 +{ 1.152 + if (this != &other) 1.153 + { 1.154 + if (other.isBogus()) 1.155 + { 1.156 + return setToBogus(); 1.157 + } 1.158 + 1.159 + int32_t length = other.getLength(); 1.160 + if (length > getCapacity() && reallocate(length, 0) == NULL) { 1.161 + return setToBogus(); 1.162 + } 1.163 + if (length > 0) { 1.164 + uprv_memcpy(getBytes(), other.getBytes(), length); 1.165 + } 1.166 + fFlagAndLength = (fFlagAndLength & 0x80000000) | length; 1.167 + fHashCode = other.fHashCode; 1.168 + } 1.169 + 1.170 + return *this; 1.171 +} 1.172 + 1.173 +// Bitwise comparison for the collation keys. 1.174 +Collator::EComparisonResult 1.175 +CollationKey::compareTo(const CollationKey& target) const 1.176 +{ 1.177 + UErrorCode errorCode = U_ZERO_ERROR; 1.178 + return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); 1.179 +} 1.180 + 1.181 +// Bitwise comparison for the collation keys. 1.182 +UCollationResult 1.183 +CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 1.184 +{ 1.185 + if(U_SUCCESS(status)) { 1.186 + const uint8_t *src = getBytes(); 1.187 + const uint8_t *tgt = target.getBytes(); 1.188 + 1.189 + // are we comparing the same string 1.190 + if (src == tgt) 1.191 + return UCOL_EQUAL; 1.192 + 1.193 + UCollationResult result; 1.194 + 1.195 + // are we comparing different lengths? 1.196 + int32_t minLength = getLength(); 1.197 + int32_t targetLength = target.getLength(); 1.198 + if (minLength < targetLength) { 1.199 + result = UCOL_LESS; 1.200 + } else if (minLength == targetLength) { 1.201 + result = UCOL_EQUAL; 1.202 + } else { 1.203 + minLength = targetLength; 1.204 + result = UCOL_GREATER; 1.205 + } 1.206 + 1.207 + if (minLength > 0) { 1.208 + int diff = uprv_memcmp(src, tgt, minLength); 1.209 + if (diff > 0) { 1.210 + return UCOL_GREATER; 1.211 + } 1.212 + else 1.213 + if (diff < 0) { 1.214 + return UCOL_LESS; 1.215 + } 1.216 + } 1.217 + 1.218 + return result; 1.219 + } else { 1.220 + return UCOL_EQUAL; 1.221 + } 1.222 +} 1.223 + 1.224 +#ifdef U_USE_COLLATION_KEY_DEPRECATES 1.225 +// Create a copy of the byte array. 1.226 +uint8_t* 1.227 +CollationKey::toByteArray(int32_t& count) const 1.228 +{ 1.229 + uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 1.230 + 1.231 + if (result == NULL) 1.232 + { 1.233 + count = 0; 1.234 + } 1.235 + else 1.236 + { 1.237 + count = fCount; 1.238 + if (count > 0) { 1.239 + uprv_memcpy(result, fBytes, fCount); 1.240 + } 1.241 + } 1.242 + 1.243 + return result; 1.244 +} 1.245 +#endif 1.246 + 1.247 +static int32_t 1.248 +computeHashCode(const uint8_t *key, int32_t length) { 1.249 + const char *s = reinterpret_cast<const char *>(key); 1.250 + int32_t hash; 1.251 + if (s == NULL || length == 0) { 1.252 + hash = kEmptyHashCode; 1.253 + } else { 1.254 + hash = ustr_hashCharsN(s, length); 1.255 + if (hash == kInvalidHashCode || hash == kBogusHashCode) { 1.256 + hash = kEmptyHashCode; 1.257 + } 1.258 + } 1.259 + return hash; 1.260 +} 1.261 + 1.262 +int32_t 1.263 +CollationKey::hashCode() const 1.264 +{ 1.265 + // (Cribbed from UnicodeString) 1.266 + // We cache the hashCode; when it becomes invalid, due to any change to the 1.267 + // string, we note this by setting it to kInvalidHashCode. [LIU] 1.268 + 1.269 + // Note: This method is semantically const, but physically non-const. 1.270 + 1.271 + if (fHashCode == kInvalidHashCode) 1.272 + { 1.273 + fHashCode = computeHashCode(getBytes(), getLength()); 1.274 + } 1.275 + 1.276 + return fHashCode; 1.277 +} 1.278 + 1.279 +U_NAMESPACE_END 1.280 + 1.281 +U_CAPI int32_t U_EXPORT2 1.282 +ucol_keyHashCode(const uint8_t *key, 1.283 + int32_t length) 1.284 +{ 1.285 + return icu::computeHashCode(key, length); 1.286 +} 1.287 + 1.288 +#endif /* #if !UCONFIG_NO_COLLATION */