1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/unicode/sortkey.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,337 @@ 1.4 +/* 1.5 + ***************************************************************************** 1.6 + * Copyright (C) 1996-2013, International Business Machines Corporation and others. 1.7 + * All Rights Reserved. 1.8 + ***************************************************************************** 1.9 + * 1.10 + * File sortkey.h 1.11 + * 1.12 + * Created by: Helena Shih 1.13 + * 1.14 + * Modification History: 1.15 + * 1.16 + * Date Name Description 1.17 + * 1.18 + * 6/20/97 helena Java class name change. 1.19 + * 8/18/97 helena Added internal API documentation. 1.20 + * 6/26/98 erm Changed to use byte arrays and memcmp. 1.21 + ***************************************************************************** 1.22 + */ 1.23 + 1.24 +#ifndef SORTKEY_H 1.25 +#define SORTKEY_H 1.26 + 1.27 +#include "unicode/utypes.h" 1.28 + 1.29 +/** 1.30 + * \file 1.31 + * \brief C++ API: Keys for comparing strings multiple times. 1.32 + */ 1.33 + 1.34 +#if !UCONFIG_NO_COLLATION 1.35 + 1.36 +#include "unicode/uobject.h" 1.37 +#include "unicode/unistr.h" 1.38 +#include "unicode/coll.h" 1.39 + 1.40 +U_NAMESPACE_BEGIN 1.41 + 1.42 +/* forward declaration */ 1.43 +class RuleBasedCollator; 1.44 + 1.45 +/** 1.46 + * 1.47 + * Collation keys are generated by the Collator class. Use the CollationKey objects 1.48 + * instead of Collator to compare strings multiple times. A CollationKey 1.49 + * preprocesses the comparison information from the Collator object to 1.50 + * make the comparison faster. If you are not going to comparing strings 1.51 + * multiple times, then using the Collator object is generally faster, 1.52 + * since it only processes as much of the string as needed to make a 1.53 + * comparison. 1.54 + * <p> For example (with strength == tertiary) 1.55 + * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator 1.56 + * only needs to process a couple of characters, while a comparison 1.57 + * with CollationKeys will process all of the characters. On the other hand, 1.58 + * if you are doing a sort of a number of fields, it is much faster to use 1.59 + * CollationKeys, since you will be comparing strings multiple times. 1.60 + * <p>Typical use of CollationKeys are in databases, where you store a CollationKey 1.61 + * in a hidden field, and use it for sorting or indexing. 1.62 + * 1.63 + * <p>Example of use: 1.64 + * <pre> 1.65 + * \code 1.66 + * UErrorCode success = U_ZERO_ERROR; 1.67 + * Collator* myCollator = Collator::createInstance(success); 1.68 + * CollationKey* keys = new CollationKey [3]; 1.69 + * myCollator->getCollationKey("Tom", keys[0], success ); 1.70 + * myCollator->getCollationKey("Dick", keys[1], success ); 1.71 + * myCollator->getCollationKey("Harry", keys[2], success ); 1.72 + * 1.73 + * // Inside body of sort routine, compare keys this way: 1.74 + * CollationKey tmp; 1.75 + * if(keys[0].compareTo( keys[1] ) > 0 ) { 1.76 + * tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp; 1.77 + * } 1.78 + * //... 1.79 + * \endcode 1.80 + * </pre> 1.81 + * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort 1.82 + * long lists of words by retrieving collation keys with Collator::getCollationKey(). 1.83 + * You can then cache the collation keys and compare them using CollationKey::compareTo(). 1.84 + * <p> 1.85 + * <strong>Note:</strong> <code>Collator</code>s with different Locale, 1.86 + * CollationStrength and DecompositionMode settings will return different 1.87 + * CollationKeys for the same set of strings. Locales have specific 1.88 + * collation rules, and the way in which secondary and tertiary differences 1.89 + * are taken into account, for example, will result in different CollationKeys 1.90 + * for same strings. 1.91 + * <p> 1.92 + 1.93 + * @see Collator 1.94 + * @see RuleBasedCollator 1.95 + * @version 1.3 12/18/96 1.96 + * @author Helena Shih 1.97 + * @stable ICU 2.0 1.98 + */ 1.99 +class U_I18N_API CollationKey : public UObject { 1.100 +public: 1.101 + /** 1.102 + * This creates an empty collation key based on the null string. An empty 1.103 + * collation key contains no sorting information. When comparing two empty 1.104 + * collation keys, the result is Collator::EQUAL. Comparing empty collation key 1.105 + * with non-empty collation key is always Collator::LESS. 1.106 + * @stable ICU 2.0 1.107 + */ 1.108 + CollationKey(); 1.109 + 1.110 + 1.111 + /** 1.112 + * Creates a collation key based on the collation key values. 1.113 + * @param values the collation key values 1.114 + * @param count number of collation key values, including trailing nulls. 1.115 + * @stable ICU 2.0 1.116 + */ 1.117 + CollationKey(const uint8_t* values, 1.118 + int32_t count); 1.119 + 1.120 + /** 1.121 + * Copy constructor. 1.122 + * @param other the object to be copied. 1.123 + * @stable ICU 2.0 1.124 + */ 1.125 + CollationKey(const CollationKey& other); 1.126 + 1.127 + /** 1.128 + * Sort key destructor. 1.129 + * @stable ICU 2.0 1.130 + */ 1.131 + virtual ~CollationKey(); 1.132 + 1.133 + /** 1.134 + * Assignment operator 1.135 + * @param other the object to be copied. 1.136 + * @stable ICU 2.0 1.137 + */ 1.138 + const CollationKey& operator=(const CollationKey& other); 1.139 + 1.140 + /** 1.141 + * Compare if two collation keys are the same. 1.142 + * @param source the collation key to compare to. 1.143 + * @return Returns true if two collation keys are equal, false otherwise. 1.144 + * @stable ICU 2.0 1.145 + */ 1.146 + UBool operator==(const CollationKey& source) const; 1.147 + 1.148 + /** 1.149 + * Compare if two collation keys are not the same. 1.150 + * @param source the collation key to compare to. 1.151 + * @return Returns TRUE if two collation keys are different, FALSE otherwise. 1.152 + * @stable ICU 2.0 1.153 + */ 1.154 + UBool operator!=(const CollationKey& source) const; 1.155 + 1.156 + 1.157 + /** 1.158 + * Test to see if the key is in an invalid state. The key will be in an 1.159 + * invalid state if it couldn't allocate memory for some operation. 1.160 + * @return Returns TRUE if the key is in an invalid, FALSE otherwise. 1.161 + * @stable ICU 2.0 1.162 + */ 1.163 + UBool isBogus(void) const; 1.164 + 1.165 + /** 1.166 + * Returns a pointer to the collation key values. The storage is owned 1.167 + * by the collation key and the pointer will become invalid if the key 1.168 + * is deleted. 1.169 + * @param count the output parameter of number of collation key values, 1.170 + * including any trailing nulls. 1.171 + * @return a pointer to the collation key values. 1.172 + * @stable ICU 2.0 1.173 + */ 1.174 + const uint8_t* getByteArray(int32_t& count) const; 1.175 + 1.176 +#ifdef U_USE_COLLATION_KEY_DEPRECATES 1.177 + /** 1.178 + * Extracts the collation key values into a new array. The caller owns 1.179 + * this storage and should free it. 1.180 + * @param count the output parameter of number of collation key values, 1.181 + * including any trailing nulls. 1.182 + * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release. 1.183 + */ 1.184 + uint8_t* toByteArray(int32_t& count) const; 1.185 +#endif 1.186 + 1.187 +#ifndef U_HIDE_DEPRECATED_API 1.188 + /** 1.189 + * Convenience method which does a string(bit-wise) comparison of the 1.190 + * two collation keys. 1.191 + * @param target target collation key to be compared with 1.192 + * @return Returns Collator::LESS if sourceKey < targetKey, 1.193 + * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL 1.194 + * otherwise. 1.195 + * @deprecated ICU 2.6 use the overload with error code 1.196 + */ 1.197 + Collator::EComparisonResult compareTo(const CollationKey& target) const; 1.198 +#endif /* U_HIDE_DEPRECATED_API */ 1.199 + 1.200 + /** 1.201 + * Convenience method which does a string(bit-wise) comparison of the 1.202 + * two collation keys. 1.203 + * @param target target collation key to be compared with 1.204 + * @param status error code 1.205 + * @return Returns UCOL_LESS if sourceKey < targetKey, 1.206 + * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL 1.207 + * otherwise. 1.208 + * @stable ICU 2.6 1.209 + */ 1.210 + UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const; 1.211 + 1.212 + /** 1.213 + * Creates an integer that is unique to the collation key. NOTE: this 1.214 + * is not the same as String.hashCode. 1.215 + * <p>Example of use: 1.216 + * <pre> 1.217 + * . UErrorCode status = U_ZERO_ERROR; 1.218 + * . Collator *myCollation = Collator::createInstance(Locale::US, status); 1.219 + * . if (U_FAILURE(status)) return; 1.220 + * . CollationKey key1, key2; 1.221 + * . UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR; 1.222 + * . myCollation->getCollationKey("abc", key1, status1); 1.223 + * . if (U_FAILURE(status1)) { delete myCollation; return; } 1.224 + * . myCollation->getCollationKey("ABC", key2, status2); 1.225 + * . if (U_FAILURE(status2)) { delete myCollation; return; } 1.226 + * . // key1.hashCode() != key2.hashCode() 1.227 + * </pre> 1.228 + * @return the hash value based on the string's collation order. 1.229 + * @see UnicodeString#hashCode 1.230 + * @stable ICU 2.0 1.231 + */ 1.232 + int32_t hashCode(void) const; 1.233 + 1.234 + /** 1.235 + * ICU "poor man's RTTI", returns a UClassID for the actual class. 1.236 + * @stable ICU 2.2 1.237 + */ 1.238 + virtual UClassID getDynamicClassID() const; 1.239 + 1.240 + /** 1.241 + * ICU "poor man's RTTI", returns a UClassID for this class. 1.242 + * @stable ICU 2.2 1.243 + */ 1.244 + static UClassID U_EXPORT2 getStaticClassID(); 1.245 + 1.246 +private: 1.247 + /** 1.248 + * Replaces the current bytes buffer with a new one of newCapacity 1.249 + * and copies length bytes from the old buffer to the new one. 1.250 + * @return the new buffer, or NULL if the allocation failed 1.251 + */ 1.252 + uint8_t *reallocate(int32_t newCapacity, int32_t length); 1.253 + /** 1.254 + * Set a new length for a new sort key in the existing fBytes. 1.255 + */ 1.256 + void setLength(int32_t newLength); 1.257 + 1.258 + uint8_t *getBytes() { 1.259 + return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; 1.260 + } 1.261 + const uint8_t *getBytes() const { 1.262 + return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; 1.263 + } 1.264 + int32_t getCapacity() const { 1.265 + return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity; 1.266 + } 1.267 + int32_t getLength() const { return fFlagAndLength & 0x7fffffff; } 1.268 + 1.269 + /** 1.270 + * Set the CollationKey to a "bogus" or invalid state 1.271 + * @return this CollationKey 1.272 + */ 1.273 + CollationKey& setToBogus(void); 1.274 + /** 1.275 + * Resets this CollationKey to an empty state 1.276 + * @return this CollationKey 1.277 + */ 1.278 + CollationKey& reset(void); 1.279 + 1.280 + /** 1.281 + * Allow private access to RuleBasedCollator 1.282 + */ 1.283 + friend class RuleBasedCollator; 1.284 + friend class CollationKeyByteSink; 1.285 + 1.286 + // Class fields. sizeof(CollationKey) is intended to be 48 bytes 1.287 + // on a machine with 64-bit pointers. 1.288 + // We use a union to maximize the size of the internal buffer, 1.289 + // similar to UnicodeString but not as tight and complex. 1.290 + 1.291 + // (implicit) *vtable; 1.292 + /** 1.293 + * Sort key length and flag. 1.294 + * Bit 31 is set if the buffer is heap-allocated. 1.295 + * Bits 30..0 contain the sort key length. 1.296 + */ 1.297 + int32_t fFlagAndLength; 1.298 + /** 1.299 + * Unique hash value of this CollationKey. 1.300 + * Special value 2 if the key is bogus. 1.301 + */ 1.302 + mutable int32_t fHashCode; 1.303 + /** 1.304 + * fUnion provides 32 bytes for the internal buffer or for 1.305 + * pointer+capacity. 1.306 + */ 1.307 + union StackBufferOrFields { 1.308 + /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */ 1.309 + uint8_t fStackBuffer[32]; 1.310 + struct { 1.311 + uint8_t *fBytes; 1.312 + int32_t fCapacity; 1.313 + } fFields; 1.314 + } fUnion; 1.315 +}; 1.316 + 1.317 +inline UBool 1.318 +CollationKey::operator!=(const CollationKey& other) const 1.319 +{ 1.320 + return !(*this == other); 1.321 +} 1.322 + 1.323 +inline UBool 1.324 +CollationKey::isBogus() const 1.325 +{ 1.326 + return fHashCode == 2; // kBogusHashCode 1.327 +} 1.328 + 1.329 +inline const uint8_t* 1.330 +CollationKey::getByteArray(int32_t &count) const 1.331 +{ 1.332 + count = getLength(); 1.333 + return getBytes(); 1.334 +} 1.335 + 1.336 +U_NAMESPACE_END 1.337 + 1.338 +#endif /* #if !UCONFIG_NO_COLLATION */ 1.339 + 1.340 +#endif